In [2]:
import getopt, sys
import pandas as pd
import numpy as np
import qiime2
import time
import unittest

from qiime2 import Metadata




import nose.tools

from nose.tools import assert_almost_equal, assert_raises, assert_equals
from pandas.util.testing import assert_frame_equal

import match_controls

## Testing get_user_input_query_lines
Have test to confirm correct input does not raise exception. Test two is to make sure 

In [3]:
def test_get_user_input_query_lines(  verbose,  inputDict ):
    testInput_allNone = {"inputdata":None, "keep":None, "control":None, "case":None, "nullvalues":None, "match":None}
    testInput_strNone = {"inputdata":None, "keep":"None", "control":None, "case":None, "nullvalues":None, "match":None}
    try:
        match_controls.get_user_input_query_lines(verbose, inputDict)
    except:
        raise Exception('Files that were meant to load did not.') 
    try:
        match_controls.get_user_input_query_lines(verbose, testInput_allNone)
    except:
        raise Exception('Not all None files were skipped like they should be.')  
 
    assert_raises(ValueError, match_controls.get_user_input_query_lines, verbose, testInput_strNone) 
    
    return True

        
        
        
     


## Testing Stable_Marriage

In [4]:
class Stable_Marriage:

    def orderDict(self, verbose, dictionary, value_frequency):
        '''
        orders the elements of each array that is associated with a sample
            key by how often they get matched to other samples least to
            greatest. Ties are sorted Alphanumbericly

        Parameters
        ----------
        verbose: boolean
            Tells function if it should output print statements or not. True
                outputs print statements.

        dictionary: dictionary
            keys are linked to arrays that contain strings that correspond to
                samples that match to the sample the key represents

        value_frequency: dictionary
            keys are samples found in arrays of dictionary. Elements are
                numerical representation of how many samples element
                matches to

        Returns
        -------
        dictionary: dictionary
            dictionary with elements of the arrays that correspond to keys
                ordered from least to greatest abundance
        '''
        for k in dictionary:
            dictionary[k] = sorted(dictionary[k])
            dictionary[k] = sorted(dictionary[k],
                key=lambda x:value_frequency[x])
        if verbose:
            print("Ordered dictionary is %s"%(dictionary))
        return dictionary

    def order_keys(self, verbose, dictionary):
        '''
        orders the keys of a dictionary so that they can be used properly as
            the freemen of stable marriage. In order greatest to least since
            pop is used to get least freeman and pop takes the right most
            entry.

        Parameters
        ----------
        verbose: boolean
            Tells function if it should output print statements or not. True
            outputs print statements.

        dictionary
            dictionary of cases or controls with their matching controls or
                cases ordered by rising abundance

        Return
        ------
        keys_greatest_to_least: list
            contains keys in order of greatest to least amount of samples they
                match to
        '''
        
        keys_greatest_to_least = sorted(dictionary, 
            key=lambda x: dictionary[x], reverse=True)
        keys_greatest_to_least.sort(reverse=True)
        if verbose:
            print("Ordered samples are %s"%(keys_greatest_to_least))
        return keys_greatest_to_least

    def stableMarriageRunner(self, verbose, case_dictionary,
        pref_counts_control, pref_counts_case):
        '''
        based on code shown by Tyler Moore in his slides for Lecture 2 for
            CSE 3353, SMU, Dallas, TX these slides can be found at
            https://tylermoore.ens.utulsa.edu/courses/cse3353/slides/l02-handout.pdf

        Gets back the best way to match samples to eachother to in a one to
            one manner. Best way refers to getting back the most amount of
            one to one matches.

        Parameters
        ----------
        verbose: boolean
            Tells function if it should output print statements or not. True
                outputs print statements.

        case_dictionary: dictionary
            case_dictionary is a dictionary of cases with their matching
                controls

        pref_counts_control: dictionary
            pref_counts_control is a dictionary with the frequency control
                match to something in control_dictionary. How many case
                samples each control sample matches to.

        pref_counts_case: dictionary
            pref_counts_case is a dictionary with the frequency cases match to
                something in case_dictionary

        Returns
        -------
        one_to_one_match_dictionary: dictionary
            dictionary with keys representing control samples and their
                corresponding values representing a match between a case and
                control sample
        '''
        #orders control samples (elements) by rising abundance of how many
            #case samples they match to
        case_dictionary = self.orderDict(verbose, case_dictionary,
            pref_counts_control)
        #first make master copy
        master_copy_of_case_dict = case_dictionary.copy()
        free_keys = self.order_keys(verbose, case_dictionary)

        one_to_one_match_dictionary = {}
        while free_keys:
            key = free_keys.pop()
            if verbose==True:
                print("Popped the key %s"%(key))
                print("Free keys are %s"%(free_keys))
            if case_dictionary[key] == []:
                continue
            #get the highest ranked woman that has not yet been proposed to
            entry = case_dictionary[key].pop()
            if verbose:
                print("Popped the entry %s"%(entry))
                
            if entry not in one_to_one_match_dictionary:
                for case_key in case_dictionary:
                    if entry in case_dictionary[case_key]:
                        case_dictionary[case_key].remove(entry)
                        
                        
                one_to_one_match_dictionary[entry] = key
                #remove key to reorder but this my not be the best if a switch
                    #is needed later
                if case_dictionary[key] == []:
                    case_dictionary.pop(key, None)
                for case_key in case_dictionary:
                    if entry in case_dictionary[case_key]:
                        case_dictionary[case_key].remove(entry)
                #reorder keys
                free_keys = self.order_keys(verbose, case_dictionary)
            else:
                key_in_use = one_to_one_match_dictionary[entry]
                if pref_counts_case[key] < pref_counts_case[key_in_use]:
                    one_to_one_match_dictionary[entry] = key
                    free_keys.append(key_in_use)
                else:
                    free_keys.append(key)
            if verbose:
                print("\tone_to_one_match_dictionary is %s"%(one_to_one_match_dictionary))

                    
        if verbose:
            print("Dictionary of matches after solving stable marriage problem is %s"%(
                one_to_one_match_dictionary))

        return one_to_one_match_dictionary

In [None]:
stable = Stable_Marriage()

case_dictionary = {'14': ['15', '17'], '25': [], '19': ['20'], '21':[],
    '6': [], '9': ['10'], '3': [], '7': ['8'], '18': ['17'], '23': [],
    '16': ['13', '15'], '27': [], '11': ['12']}
control_match_count_dictionary = {'10': 1, '15': 2, '17': 2, '12': 1,
    '8': 1, '20': 1, '13': 1}
case_match_count_dictionary = {'23': 0, '6': 0, '14': 2, '21': 0, '16': 2,
    '11': 1, '19': 1, '9': 1, '27': 0, '7': 1, '3': 0, '25': 0, '18': 1}

verbose = True
    
case_to_control_match = stable.stableMarriageRunner(verbose, case_dictionary,
    control_match_count_dictionary, case_match_count_dictionary)
print(case_to_control_match)
test_output = {'20':'19', '10':'9', '12':'11', '8':'7', '15':'14', '17':'18', '13':'16'}
assert_equals(case_to_control_match, test_output)

In [None]:
case_dictionary = {'2': ['1', '10'], '4': ['3'], '6': [], '9':[]}
control_match_count_dictionary = {'10': 1, '1': 1, '3': 1}
case_match_count_dictionary = {'9': 0, '4': 1, '6': 0, '2': 2}


case_to_control_match = stable.stableMarriageRunner(verbose, case_dictionary,
    control_match_count_dictionary, case_match_count_dictionary)
print(case_to_control_match)


In [6]:
def test_determine_cases_and_controls(unit, normal_input, normal_output,
    empty_output, normal_control, normal_case, noentries_case, empty_file):
    verbose = False

    norm_in = Metadata.load("./%s/%s"%(unit, normal_input))
    norm_out = Metadata.load("./%s/%s"%(unit, normal_output))
    empty_out = Metadata.load("./%s/%s"%(unit, empty_output))

    norm_case = open("./%s/%s"%(unit, normal_case), "r").read().splitlines()
    norm_control = open("./%s/%s"%(unit, normal_control), "r").read().splitlines()
    noentry_case = open("./%s/%s"%(unit, noentries_case), "r").read().splitlines()
    emp_file = open("./%s/%s"%(unit, empty_file), "r").read().splitlines()

    case_control_dict = {"case":norm_case, "control":norm_control}
    unit_norm_out = determine_cases_and_controls(verbose,
        norm_in, case_control_dict)

    case_control_dict = {"case":emp_file, "control":emp_file}
    assert_raises(ValueError, determine_cases_and_controls,
        verbose, norm_in, case_control_dict)

    case_control_dict = {"case":noentry_case, "control":norm_control}
    assert_raises(ValueError, determine_cases_and_controls,
        verbose, norm_in, case_control_dict)

    norm_out = norm_out.to_dataframe()
    unit_norm_out = unit_norm_out.to_dataframe()
    print(norm_out["case_control"])
    print(unit_norm_out["case_control"])
    print(unit_norm_out)
    assert_frame_equal(norm_out, unit_norm_out)

In [65]:
def determine_cases_and_controls(verbose, afterExclusion_MD, query_line_dict):
    '''
    Determines what samples are cases or controls using the queries in
        query_line_array. The labels of each sample are stored in
        case_controlDF

    Parameters
    ----------
    verbose: boolean
        Tells function if it should output print statements or not. True
            outputs print statements.

    afterExclusion_MD : Metadata object
        Metadata object with unwanted samples filtered out

    query_line_array : array of arrays of strings
        there are two sub arrays
        the first array are made of queries to determine controls
        the second array are made of queries to determine cases

    Returns
    -------
    mergedMD : Metadata object
        Metadata object with unwanted samples filtered out and a case_control
            column that reflects if the index is a case, control, or Undefined
    '''

    ids = afterExclusion_MD.get_ids()
    case_control_Series = pd.Series(["Unspecified"] * len(ids), ids)
    case_control_Series.index.name = afterExclusion_MD.id_header
    case_controlDF = case_control_Series.to_frame("case_control")

    if verbose:
        print("Metadata Object has %s samples"%(afterExclusion_MD.id_count))

    for key in query_line_dict:
        if key != "case" and key != "control":
            if verbose:
                print("Wrong key used for query. Must be 'case' or 'control'.")
            continue
        
        #resets shrunk_MD so that filtering down to control samples does not
            #influence filtering down to case
        shrunk_MD = afterExclusion_MD
        
        #get query and filtering down to control or case samples based on key
        query_lines = query_line_dict[key]
        if len(query_lines) < 1:
            raise ValueError("The %s query file is empty"%(key))

        ids = shrunk_MD.get_ids(" AND ".join(query_lines))

        shrunk_MD = shrunk_MD.filter_ids(ids)
        if verbose:
            print("%s %s samples "%(shrunk_MD.id_count,key))


        #replaces the true values created by the loop above to case or control
        ids = shrunk_MD.ids
        case_controlDF.loc[ids, "case_control"] = key

    #turns case_controlDF into a metadata object
    case_controlMD = Metadata(case_controlDF)

    #merges afterExclution_MD and case_controlMD into one new metadata object
    mergedMD = Metadata.merge(afterExclusion_MD, case_controlMD)

    return mergedMD

In [67]:
test_determine_cases_and_controls("unitTest_files", "unit_case_input.tsv", "unit_case_output.tsv",
        "unit_case_empty_output.tsv", "test_control_in.txt", "test_case.txt",
        "test_case_noentries.txt", "empty_file.txt")

id
1            case
2         control
3            case
4     Unspecified
5         control
6            case
7            case
8         control
9            case
10        control
11    Unspecified
Name: case_control, dtype: object
id
1            case
2         control
3            case
4     Unspecified
5         control
6            case
7            case
8         control
9            case
10        control
11    Unspecified
Name: case_control, dtype: object
      age_years     sex        disease bmi_category    test_site  \
id                                                                 
1            30    male  schizophrenia   overweight        fecal   
2            26    male           none       normal        fecal   
3            30    male  schizophrenia   overweight         none   
4            26    male            flu       normal  Unspecified   
5   Unspecified    male           none        obese         oral   
6            20    male     depression        obese   

In [55]:
def test_orderDict(verbose):
    test_unorderedDict = {'key_1':['a','b','c','d','e'], 'key_2':['a','c','e','d','b'],  'key_3':['b','e','e','c','a']}
    correct_output = {'key_1':['a','b','c','d','e'], 'key_2':['a','b','c','d','e'], 'key_3':['a','b','c','e','e']}

    test_error = {'key_4':['b','f','e','c','a']}

    test_equal_freq = {'key_5':['c','b','e','d','a']}
    correct_output_freq = {'key_5':['a','b','c','d','e']}

    test_frequencies = {'a':1,'b':2,'c':3,'d':4,'e':5}
    test_frequencies_equal = {'a':1,'b':3,'c':3,'d':3,'e':5}
    stableObject = match_controls.Stable_Marriage()
    
    if (stableObject.orderDict(verbose, test_unorderedDict, test_frequencies) != correct_output):
        print('orderDict is not properly matching the output')
        print(stableObject.orderDict(verbose, test_unorderedDict, test_frequencies))
        print('should be')
        print(correct_output)
        return False
    counter = 0
    run_number = 1
    while run_number <=100:
        if (stableObject.orderDict(verbose, test_equal_freq, test_frequencies_equal) != correct_output_freq):
            counter = counter + 1
        run_number = run_number + 1
    if counter>=0:
        print('times out of 100 that the order was wrong = %s'%(counter))
    try:
        stableObject.orderDict(verbose, test_error, test_frequencies)
        return False
    except:
        return True
    return True








In [4]:
def test_order_keys(verbose):
    stableObject = match_controls.Stable_Marriage()

    test_unorderedDict={'2':['a','b'], '1':['a'],  '3':['a','b','c'],  '5':['a','b','c','d','e'], '4':['a','b','c','d']}
    correct_output=['5','4','3','2','1']
    if (stableObject.order_keys(verbose, test_unorderedDict) != correct_output):
        print('order_keys is not properly matching the output')
        print(stableObject.order_keys(verbose, test_unorderedDict))
        print('should be')
        print(correct_output)
        return False

    test_unorderedDict_equ_freq={'2b':['a','b'], '1':['a'], '2a':['a', 'c'],  '3':['a','b','c'],  '5':['a','b','c','d','e'], '4':['a','b','c','d']}
    correct_output_equ_freq=['5','4','3','2a', '2b', '1']
    if (stableObject.order_keys(verbose, test_unorderedDict_equ_freq) != correct_output_equ_freq):
        print('order_keys is not properly matching the output')
        print(stableObject.order_keys(verbose, test_unorderedDict_equ_freq))
        print('should be')
        print(correct_output_equ_freq)
        return False
    return True

In [5]:
def test_stable_marriage(verbose):
    stableObject = match_controls.Stable_Marriage()
    case_dictionary = {'14': ['15', '17'], '25': [], '19': ['20'], '21':[], '6': [], '9': ['10'], '3': [], '7': ['8'], '18': ['17'], '23': [], '16': ['13', '15'], '27': [], '11': ['12']}
    control_match_count_dictionary = {'10': 1, '15': 2, '17': 2, '12': 1, '8': 1, '20': 1, '13': 1}
    case_match_count_dictionary = {'23': 0, '6': 0, '14': 2, '21': 0, '16': 2, '11': 1, '19': 1, '9': 1, '27': 0, '7': 1, '3': 0, '25': 0, '18': 1}

    case_to_control_match = stableObject.stableMarriageRunner(verbose, case_dictionary, control_match_count_dictionary, case_match_count_dictionary)
    test_output  = {'8': '7', '10': '9', '13': '16', '15': '14', '20': '19', '12': '11', '17': '18'}
    if case_to_control_match != test_output:
        print("stable marriage fails. \nOutput should be %s \nOutput was"%(test_output))
        print(case_to_control_match)

        return False
    return True

## Testing individual functions


In [6]:
def test_mainControler(verbose, unit, inputdata, keep, control, case, nullvalues, match, output):
    unit = True
    #used for null checks
    i = 1
    o = 1
    k = 1
    co = 1
    ca = 1
    n = 1
    m = 1
    
    
    if "Everything" != match_controls.mainControler(verbose, unit, inputdata, keep, control, case, nullvalues, match, output):
        print("Error with calling Everything")
    if "ExcludeControlCaseAndMatch" != match_controls.mainControler(verbose, unit, inputdata, keep, control, case, n, match, output):
        print("Error with calling ExcludeControlCaseAndMatch")
    if "KeepAndControlCase" != match_controls.mainControler(verbose, unit, inputdata, keep, control, case, nullvalues, match, output):
        print("Error with calling KeepAndControlCase") 
    if "KeepOnly" != match_controls.mainControler(verbose, unit, inputdata, keep, co, ca, n, m, output):
        print("Error with calling KeepOnly")
    if "ControlCaseNullAndMatch" != match_controls.mainControler(verbose, unit, inputdata, k, control, case, nullvalues, match, output):
        print("Error with calling ControlCaseNullAndMatch")
    if "ControlCaseAndMatch" != match_controls.mainControler(verbose, unit, inputdata, k, control, case, n, match, output):
        print("Error with calling ControlCaseAndMatch")
    if "ControlAndCaseOnly" != match_controls.mainControler(verbose, unit, inputdata, k, control, case, n, m, output):
        print("Error with calling ControlAndCaseOnly")
    
    if "Everything" != match_controls.mainControler(verbose, unit, inputdata, keep, control, case, nullvalues, match, o):
        print("Error with calling Everything with int type output")
    if "Everything" != match_controls.mainControler(verbose, unit, i, keep, control, case, nullvalues, match, output):
        print("Error with calling Everything with int type inputdata")
    
    if "KeepOnly" != match_controls.mainControler(verbose, unit, inputdata, keep, co, ca, n, m, output):
        print("Error with calling KeepOnly with casw and control files being ints")
    if "KeepOnly" != match_controls.mainControler(verbose, unit, inputdata, keep, co, case, n, m, output):
        print("Error with calling KeepOnly with control file being int")
    if "No Functions Called" != match_controls.mainControler(verbose, unit, inputdata, k, co, case, nullvalues, match, output):
        print("Error when not calling anything with keep and control files being ints")

    if "No Functions Called" != match_controls.mainControler(verbose, unit, i, k, co, ca, n, m, o):
        print("Error when not calling anything with all file inputs being ints")
    if "No Functions Called" != match_controls.mainControler(verbose, unit, inputdata, k, co, ca, n, m, output):
        print("Error when not calling anything with input and output files being ints")
        
    return False

check normal keep 
test that keep that keeps nothing gives error

In [7]:
def test_keep_samples(verbose, inputData, keep, nokeep, csvdata_keep):
    try:
        afterExclusionMD = match_controls.keep_samples(verbose, inputData, nokeep)
        return "Error no exception resulted like it should"
    except:
        if verbose:
            print("Null tests passed")
    
    try:
        afterExclusionMD = match_controls.keep_samples(verbose, inputData, keep)
    except:
        return "Exception error resulted"
    
    csvdata = afterExclusionMD.to_dataframe()
    csvdata_keep = csvdata_keep.to_dataframe()
    try:
        assert_frame_equal(csvdata, csvdata_keep)
        return True
    except:
        return False

In [8]:
def test_determine_cases_and_controls(verbose, inputData, badInputData, case, nocase, control, csvdata_case_control, csvdata_case_controlWithNoKeep):
   
    try:
        ids = inputData.get_ids()
        case_control_Series = pd.Series( ['Unspecified'] * len(ids), ids)
        case_control_Series.index.name = inputData.id_header
        case_controlDF = case_control_Series.to_frame('case_control')
        nocase_control_dict = {'case':nocase, 'control':control }
    except:
        return "Exception error resulted creating nocase_control_dict"
    
    try:
        case_controlMD = match_controls.determine_cases_and_controls(verbose, inputData, nocase_control_dict, case_controlDF)
        return "Error no exception resulted like it should"
    except:
        if verbose:
            print("Null tests passed")
    
    try:
        ids = badInputData.get_ids()
        case_control_Series = pd.Series( ['Unspecified'] * len(ids), ids)
        case_control_Series.index.name = badInputData.id_header
        case_controlDF = case_control_Series.to_frame('case_control')
        case_control_dict = {'case':case, 'control':control }
    except:
        return "Exception error resulted creating case_control_dict"
    try:
        case_controlMD = match_controls.determine_cases_and_controls(verbose, badInputData, case_control_dict, case_controlDF)
    except:
        return "Exception error resulted creating case_controlMD"
    
    csvdata = case_controlMD.to_dataframe()
    csvdata_case_controlWithNoKeep = csvdata_case_controlWithNoKeep.to_dataframe()
    
    try:
        assert_frame_equal(csvdata, csvdata_case_controlWithNoKeep)
    except:
        return False
    

    try:
        ids = inputData.get_ids()
        case_control_Series = pd.Series( ['Unspecified'] * len(ids), ids)
        case_control_Series.index.name = inputData.id_header
        case_controlDF = case_control_Series.to_frame('case_control')
        case_control_dict = {'case':case, 'control':control }
    except:
        return "Exception error resulted creating case_control_dict"
    try:
        case_controlMD = match_controls.determine_cases_and_controls(verbose, inputData, case_control_dict, case_controlDF)
    except:
        return "Exception error resulted creating case_controlMD"
    
    csvdata = case_controlMD.to_dataframe()
    csvdata_case_control = csvdata_case_control.to_dataframe()
    
    try:
        assert_frame_equal(csvdata, csvdata_case_control)
    except:
        return False
    
    
    return True

test:
normal
wrong column
To do- no samples pass
nothing filtered

In [13]:

def test_filter_prep_for_matchMD(verbose, inputData, match, match_wrongColumn, nullValues, nonullValues, csvdata_filter):
    
    try:
        prepped_for_matchMD = match_controls.filter_prep_for_matchMD(verbose, inputData, match, nonullValues)
        return "Error no exception resulted like it should when no samples pass"
    except:
        if verbose:
            print("First Null test passed")
    try:
        prepped_for_matchMD = match_controls.filter_prep_for_matchMD(verbose, inputData, match_wrongColumn, nullValues)
        return "Error no exception resulted like it should when wrong column in match query file"
    except:
        if verbose:
            print("All Null tests passed")    
            
            
    csvdata_filter = csvdata_filter.to_dataframe()   
    try:
        prepped_for_matchMD = match_controls.filter_prep_for_matchMD(verbose, inputData, match, nullValues)
    except:
        return "Exception error resulted"
    
    csvdata = prepped_for_matchMD.to_dataframe()

    #assert_frame_equal(csvdata, csvdata_filter)
    try:
        assert_frame_equal(csvdata, csvdata_filter)
    except:
        return False
    
    try:
        prepped_for_matchMD = match_controls.filter_prep_for_matchMD(verbose, csvdata_filter, match, nullValues)
    except:
        return "Exception error resulted"
    
    csvdata = prepped_for_matchMD.to_dataframe()

    try:
        assert_frame_equal(csvdata, csvdata_filter)
    except:
        return False
    
    return True


Tests made
test proper with everything done before. This also tests that it realigns matches
test that columns
test range ints are not strings
test case samples ints are not strings
test control sample ints ar not strings
test no matches

In [14]:

def test_match_samples(verbose, inputData, strcaseInput, strcontrolInput, noMatch_inputData, match, matchrangeStr, badColumnMatch, csvdata_match, csvdata_nomatch):
    

    
    
    try:
        matchedMD = match_controls.match_samples(verbose, inputData, badColumnMatch)
        return "Error no exception resulted like it should from including non existant column in match queries file"
    except:
        if verbose:
            print("Non existant column Null test passed")
    try:
        matchedMD = match_controls.match_samples(verbose, inputData, matchrangeStr)
        return "Error no exception resulted like it should when a range number in match query file is in a format that can't be turned into a float"
    except:
        if verbose:
            print("Non float as range number test passed")
    try:
        matchedMD = match_controls.match_samples(verbose, strcaseInput, match)
        return "Error no exception resulted like it should when a number in a case sample row is a string"
    except:
        if verbose:
            print("Number in a case sample row is a string test passed")
    try:
        matchedMD = match_controls.match_samples(verbose, strcontrolInput, match)
        return "Error no exception resulted like it should when a number in a control sample row is a string"
    except:
        if verbose:
            print("All Null tests passed")
    
    
    
    try:
        matchedMD = match_controls.match_samples(verbose, inputData, match)
    except:
        return "Exception error resulted"
    
    csvdata = matchedMD.to_dataframe()
    csvdata_match = csvdata_match.to_dataframe()
    
    '''csvdata_match["matched_to"]= csvdata_match["matched_to"].astype("int64")
    csvdata_match["age_years"]= csvdata_match["age_years"].astype("int64")
    csvdata["matched_to"]= csvdata["matched_to"].astype("int64")
    csvdata["age_years"]= csvdata["age_years"].astype("int64")'''
    
    try:
        assert_frame_equal(csvdata, csvdata_match)
    except:
        return False
    
    
    
    
    try:
        matchedMD = match_controls.match_samples(verbose, noMatch_inputData, match)
    except:
        return "Exception error resulted"
    
    csvdatano = matchedMD.to_dataframe()
    csvdata_nomatch = csvdata_nomatch.to_dataframe()
    
    '''csvdata_nomatch["matched_to"]= csvdata_nomatch["matched_to"].astype("int64")
    csvdata_nomatch["age_years"]= csvdata_nomatch["age_years"].astype("int64")
    csvdatano["matched_to"]= csvdatano["matched_to"].astype("int64")
    csvdatano["age_years"]= csvdatano["age_years"].astype("int64")'''
    
    try:
        assert_frame_equal(csvdatano, csvdata_nomatch)
    except:
        return False
    
    
    
    print('test---------- age_years are the same = %s'%( csvdata_match["age_years"].equals(csvdata["age_years"] )))
    #print( assert_frame_equal(csvdata, csvdata_match) )
    if csvdata_match["age_years"].equals(csvdata["age_years"]) != True:
        print("age_years columns don't match")
        print(csvdata["age_years"])
        print(csvdata_match["age_years"])

          
        
    
    return True

In [15]:
inputDict ={"inputdata":"unitTest_files/unit_keepinput.tsv", 
            "keep":"unitTest_files/test_keep.txt", 
            "control":"unitTest_files/test_control.txt", 
            "case":"unitTest_files/test_case.txt", 
            "nullvalues":"unitTest_files/test_nulls.txt", 
            "match":"unitTest_files/test_match.txt"}
qr = test_get_user_input_query_lines(  False,  inputDict )

In [16]:
inputData = Metadata.load("unitTest_files/unit_keepinput.tsv")
output = "unitTest_files/testOutputfile.tsv"


nokeep = open("unitTest_files/test_keep_noentries.txt",'r').readlines() 
test_control_in = open("unitTest_files/test_control_in.txt",'r').readlines() 
nocase = open("unitTest_files/test_case_noentries.txt",'r').readlines() 
matchrangeStr = open("unitTest_files/test_matchErrorThree.txt",'r').readlines() 
match_wrongColumn = open("unitTest_files/test_matchErrorWrongColumn.txt",'r').readlines() 
nonullValues = open("unitTest_files/test_nulls_noentries.txt",'r').readlines() 


keep = open("unitTest_files/test_keep.txt",'r').readlines()  
control = open("unitTest_files/test_control.txt",'r').readlines()  
case = open("unitTest_files/test_case.txt",'r').readlines()  
nullValues = open("unitTest_files/test_nulls.txt",'r').readlines()  
match = open("unitTest_files/test_match.txt",'r').readlines()  




csvdata_keepoutput = Metadata.load("unitTest_files/unit_keepoutput.tsv" ) 
csvdata_keep_case_control_output = Metadata.load("unitTest_files/unit_keepccoutput.tsv" ) 
csvdata_kccnull = Metadata.load("unitTest_files/unit_keepccnulloutput.tsv" ) 
csvdata_kccnmatch = Metadata.load("unitTest_files/unit_keepccnullmatchoutput.tsv" ) 

strcaseInput = Metadata.load("unitTest_files/unit_matchwithintasstringscase.tsv" ) 
strcontrolInput = Metadata.load("unitTest_files/unit_matchwithintasstringscontrol.tsv" ) 
unit_nokeepinput = Metadata.load("unitTest_files/unit_nokeepinput.tsv" ) 
unit_nokeepccoutput = Metadata.load("unitTest_files/unit_nokeepccoutput.tsv" ) 
noMatch_inputData = Metadata.load("unitTest_files/unit_withnomatches_input.tsv" ) 
csvdata_nomatch = Metadata.load("unitTest_files/unit_withnomatches_output.tsv" ) 





verbose = False
unit = False


#main = test_mainControler(verbose, unit, inputdata, keep, control, case, nullvalues, match, output)

k = test_keep_samples(verbose, inputData, keep, nokeep, csvdata_keepoutput)
cc = test_determine_cases_and_controls(verbose, csvdata_keepoutput, unit_nokeepinput, case, nocase, control, csvdata_keep_case_control_output, unit_nokeepccoutput)
nul = test_filter_prep_for_matchMD(verbose, csvdata_keep_case_control_output, match, match_wrongColumn, nullValues, nonullValues, csvdata_kccnull)
m = test_match_samples(verbose, csvdata_kccnull, strcaseInput, strcontrolInput, noMatch_inputData, match, matchrangeStr, match_wrongColumn, csvdata_kccnmatch, csvdata_nomatch)
od = test_orderDict(verbose)
ok = test_order_keys(verbose)
sm = test_stable_marriage(verbose)
   
if k == True and cc == True and nul == True and m == True and od == True and ok == True and sm == True and qr == True:
    print("All Unit Tests passed")
else:
    print(k)
    print(cc)
    print(nul)
    print(m)
    print(od)
    print(ok)
    print(sm)





AssertionError: Attributes are different

Attribute "dtype" are different
[left]:  object
[right]: float64