In [1]:
def independent_variable_model_collapse(model,independent_column_name="Frequency", **options):
    """Returns a model with a single set of independent variables. Default is to average values together
    but geometric mean, std, variance, rss, mad and median are options.
    Geometric means of odd number of negative values fails"""
    if type(model) in [pandas.DataFrame]:
        model_1 = DataFrame_to_AsciiDataTable(model)
    defaults = {"method": "mean"}
    # load other options from model
    for option, value in model.options.iteritems():
        if not re.search('begin_line|end_line', option):
            defaults[option] = value
    for element in model.elements:
        if model.__dict__[element]:
            if re.search("meta", element, re.IGNORECASE):
                defaults["metadata"] = model.metadata.copy()
            else:
                defaults[element] = model.__dict__[element][:]
    # We need to preserve the frequency column some how
    collapse_options = {}
    for key, value in defaults.iteritems():
        collapse_options[key] = value
    for key, value in options.iteritems():
        collapse_options[key] = value
    unique_independent_variable_list = sorted(list(set(model[independent_column_name])))
    independent_variable_selector = model.column_names.index(independent_column_name)
    out_data = []
    for index, independent_variable in enumerate(unique_independent_variable_list):
        data_row = filter(lambda x: x[independent_variable_selector] == independent_variable, model.data[:])
        if re.search('mean|av', collapse_options["method"], re.IGNORECASE):
            new_row = np.mean(np.array(data_row), axis=0).tolist()
        elif re.search('median', collapse_options["method"], re.IGNORECASE):
            new_row = np.median(np.array(data_row), axis=0).tolist()
        elif re.search('geometric', collapse_options["method"], re.IGNORECASE):
            new_row = gmean(np.array(data_row), axis=0).tolist()
        elif re.search('st', collapse_options["method"], re.IGNORECASE):
            new_row = np.std(np.array(data_row), axis=0).tolist()
        elif re.search('var', collapse_options["method"], re.IGNORECASE):
            new_row = np.var(np.array(data_row), axis=0, dtype=np.float64).tolist()
        elif re.search('rms', collapse_options["method"], re.IGNORECASE):
            new_row = np.sqrt(np.mean(np.square(np.array(data_row)), axis=0, dtype=np.float64)).tolist()
        elif re.search('rss', collapse_options["method"], re.IGNORECASE):
            new_row = np.sqrt(np.sum(np.square(np.array(data_row)), axis=0, dtype=np.float64)).tolist()
        elif re.search('mad', collapse_options["method"], re.IGNORECASE):
            new_row = mad(np.array(data_row), axis=0).tolist()
        new_row[independent_variable_selector]=independent_variable
        out_data.append(new_row)

    collapse_options["data"] = out_data

    if collapse_options["specific_descriptor"]:
        collapse_options["specific_descriptor"] = collapse_options["method"] + "_" + \
                                                  collapse_options["specific_descriptor"]
    resulting_model = AsciiDataTable(None, **collapse_options)
    return resulting_model


In [None]:
def independent_variable_model_difference(model_1, model_2, independent_column_name="Frequency",**options):
    """Takes the difference of two models that both have frequency and a similar set of columns. Returns an object that is
    a list of [[independent_variable,column_1,..column_n],...] where columns are the same in the models. If  a particular subset of
    columns is desired use columns=[independent_variable,column_name_1,..column_name_n] models 
    can be any subclass of AsciiDataTable, SNP, or
    pandas.DataFrame, if a column is a non-numeric type it drops it. """
    # Set up defaults and pass options
    defaults = {"columns": "all", "interpolate": False, "average": True}
    difference_options = {}
    for key, value in defaults.iteritems():
        difference_options[key] = value
    for key, value in options.iteritems():
        difference_options[key] = value

    # first check type, if it is a panadas data frame a little conversion is needed, else is for all other models
    if type(model_1) in [pandas.DataFrame]:
        model_1 = DataFrame_to_AsciiDataTable(model_1)
    if type(model_2) in [pandas.DataFrame]:
        model_2 = DataFrame_to_AsciiDataTable(model_2)
    # now start with a set of frequencies (unique values from both)
    independent_variable_set_1 = set(model_1[independent_column_name])
    independent_variable_set_2 = set(model_2[independent_column_name])
    model_2_independent_variable_selector = model_2.column_names.index(independent_column_name)
    column_names_set_1 = set(model_1.column_names)
    column_names_set_2 = set(model_2.column_names)

    # All points must be in the intersection to be used
    independent_variable_intersection = list(independent_variable_set_1.intersection(independent_variable_set_2))
    column_names_intersection = list(column_names_set_1.intersection(column_names_set_2))

    if not independent_variable_intersection:
        print("The models do not have any {0} points in common".format(independent_column_name))
        return None
    new_column_names = [independent_column_name]
    column_types=['float']
    for column_index, column in enumerate(model_1.column_names):
        if column in column_names_intersection and column not in [independent_column_name]:
            new_column_names.append(column)
            column_types.append(model_1.options["column_types"][column_index])

    difference_data = []
    for row_index, independent_variable in enumerate(model_1[independent_column_name]):
        new_row = [independent_variable]
        if independent_variable in independent_variable_intersection:
            model_2_independent_variable_row = filter(lambda x: x[model_2_independent_variable_selector] == independent_variable,
                                                      model_2.data)[0]
            
            for column_index, column in enumerate(model_1.column_names):
                if column in column_names_intersection and column not in [independent_column_name]:
                    model_2_column_selector = model_2.column_names.index(column)
                    if re.search('int|float',
                                 model_1.options["column_types"][column_index],
                                 re.IGNORECASE) and re.search('int|float',
                                                              model_2.options["column_types"][model_2_column_selector],
                                                              re.IGNORECASE):

                        new_row.append(
                            model_1.data[row_index][column_index] - model_2_independent_variable_row[model_2_column_selector])

                        # Print("New Column Names are {0}".format(new_column_names))
                    elif difference_options["columns"] in ["all"]:
                        new_row.append(model_1.data[row_index][column_index])
            difference_data.append(new_row)
    difference_options["column_names"] = new_column_names
    # print("New Column Names are {0}".format(new_column_names))
    difference_options["data"] = difference_data
    difference_options["column_types"]=column_types
    #print column_types
    result = AsciiDataTable(None, **difference_options)
    return result