In [86]:
import pandas as pd

# Script

In [87]:
def tablematch(
    df_tablematch_input, # Tablematch Rules
    df_main_table_input, # Input Table
    tm_result, # Result Column
    tm_active = None, # Name of Boolean Column to filter aktive Rules
    tm_order = None, # Name of Int Column to sort Rules
    tm_category = None, # String Column to filter Categories
    input_category = None, # List Input which Category to use
    tm_comment = None, # Comment of Rule
    case_sensitive = True, # Case Sensitive Data, otherwise everything will be upper case. Carefull with inplace
    inplace = False, # Replace existing Dataframe or add Result Column
    rule_delimiter = ',', # Delimiter for the rules
    and_delimiter = '&',
    debug = False,
    ): 

    # Create copy of dataframes
    df_main_table = df_main_table_input.copy()

    df_tablematch = df_tablematch_input.copy()

    # Keep only active rules
    if tm_active is not None:
        df_tablematch = df_tablematch.loc[df_tablematch[tm_active] == True,:].copy()
        df_tablematch.drop(columns = [tm_active], inplace = True)
    
    # Filter for rules category
    if input_category is not None and tm_category is not None:
        df_tablematch = df_tablematch.loc[df_tablematch[tm_category].isin(input_category),:].copy()
    if tm_category is not None:
        df_tablematch.drop(columns = [tm_category], inplace = True)

    if tm_order is not None and debug:
        df_tablematch[tm_result] = df_tablematch[tm_order].astype(str) + ' - ' + df_tablematch[tm_result].astype(str)

    # Sort by tm_order
    if tm_order is not None:
        df_tablematch.sort_values(by = tm_order, inplace = True)
        df_tablematch.drop(columns = [tm_order], inplace = True)

    # Drop Comment
    if tm_comment is not None:
        df_tablematch.drop(columns = [tm_comment], inplace = True)

    # Column names to list
    tablematch_columns = df_tablematch.columns.to_list()
    tablematch_columns.remove(tm_result)

    main_table_columns = df_main_table.columns.to_list()
    
    # Find overlapping columns
    column_inner = list(set(main_table_columns) & set(tablematch_columns))

    # Drop other columns
    df_tablematch.drop(labels=set(tablematch_columns).difference(set(column_inner)), axis=1, inplace=True)

    # Check if result_column already exists
    if tm_result in main_table_columns:
        raise ValueError('Column "' + tm_result + '" already exist in dataset!')
    else:
        # Dict for Mapping of Boolean
        boolean_to_string = {True: 'True', False: 'False'}

        # Change columns if needed
        for column in column_inner:
            if debug:
                print('df_tablematch at column "' + str(column) + '" at Start:')
                print(df_tablematch)
            
            # Replace Boolean with Strings
            mask_tablematch = df_tablematch[column].map(type) != bool
            df_tablematch[column] = df_tablematch[column].where(mask_tablematch, df_tablematch[column].map(boolean_to_string), axis = 0)

            # Change dtype to object
            if df_tablematch[column].dtype != object:
                df_tablematch[column] = df_tablematch[column].astype(object)
                df_tablematch[column] = df_tablematch[column].map(str)
            
            # Fill Null values with empty strings
            df_tablematch[column].fillna('', inplace = True)
            
            # If not case sensitive, make upper
            if not case_sensitive:
                df_tablematch[column] = df_tablematch[column].str.upper()

            # Split values on rule delimiter
            df_tablematch[column] = df_tablematch[column].str.split(pat = rule_delimiter)
            df_tablematch = df_tablematch.explode(column)
            df_tablematch[column] = df_tablematch[column].str.strip()

            # Reset Index
            df_tablematch.reset_index(drop = True, inplace = True)

            if debug:
                print('df_tablematch at column "' + str(column) + '" after column operations:')
                print(df_tablematch)

            # Transform rules
            for index, row in df_tablematch.iterrows():
                if row[column] == '':
                    df_tablematch.loc[index,column] = None
                elif row[column][0] == '(' and row[column][-1] == ')':
                    # print('Do something!')
                    and_row = row[column][1:-1].split(and_delimiter)
                    rule_full = []
                    for rule in and_row:
                        rule = rule.strip()
                        if rule[0:2] == '!=':
                            rule_full.append('df_main_table["' + column + '"] != ' + rule[2:])
                        elif rule[0:2] == '>=':
                            rule_full.append('df_main_table["' + column + '"] >= ' + rule[2:])
                        elif rule[0:2] == '<=':
                            rule_full.append(+ 'df_main_table["' + column + '"] <= ' + rule[2:])
                        elif rule[0] == '>':
                            rule_full.append('df_main_table["' + column + '"] > ' + rule[1:])
                        elif rule[0] == '<':
                            rule_full.append('df_main_table["' + column + '"] < ' + rule[1:])
                        elif rule[0:2] == '==':
                            rule_full.append('df_main_table["' + column + '"] == ' + rule[2:])
                        elif rule[0] == '=':
                            rule_full.append('df_main_table["' + column + '"] == ' + rule[1:])
                        elif rule[0:2] == '!*':
                            if rule[-1] == '*':
                                rule_full.append('~df_main_table["' + column + '"].str.contains("' + rule[2:-1] +'")')
                            else:
                                rule_full.append('~df_main_table["' + column + '"].str.endswith("' + rule[2:] +'")')
                        elif rule[0] == '*':
                            if rule[-1] == '*':
                                rule_full.append('df_main_table["' + column + '"].str.contains("' + rule[1:-1] +'")')
                            else:
                                rule_full.append('df_main_table["' + column + '"].str.endswith("' + rule[1:] +'")')
                        elif rule[-1] == '*':
                            if row[column][0] == '!':
                                rule_full.append('~df_main_table["' + column + '"].str.startswith("' + rule[1:-1] +'")')
                            else:
                                rule_full.append('df_main_table["' + column + '"].str.startswith("' + rule[:-1] +'")')
                        elif rule[0] == '!':
                            rule_full.append('df_main_table["' + column + '"] != "' + rule +'"')
                        else:
                            rule_full.append('df_main_table["' + column + '"] == "' + rule +'"')
                    rule_full = '((' + ') & ('.join(rule_full) + '))'
                    df_tablematch.loc[index,column] = rule_full
                elif row[column] == 'BLANK':
                    df_tablematch.loc[index,column] = 'df_main_table["' + column + '"].isna()'
                elif row[column] == '!BLANK':
                    df_tablematch.loc[index,column] = 'df_main_table["' + column + '"].notna()'
                elif row[column][0:2] == '!=':
                    df_tablematch.loc[index,column] = 'df_main_table["' + column + '"] != ' + row[column][2:]
                elif row[column][0:2] == '>=':
                    df_tablematch.loc[index,column] = 'df_main_table["' + column + '"] >= ' + row[column][2:]
                elif row[column][0:2] == '<=':
                    df_tablematch.loc[index,column] = 'df_main_table["' + column + '"] <= ' + row[column][2:]
                elif row[column][0] == '>':
                    df_tablematch.loc[index,column] = 'df_main_table["' + column + '"] > ' + row[column][1:]
                elif row[column][0] == '<':
                    df_tablematch.loc[index,column] = 'df_main_table["' + column + '"] < ' + row[column][1:]
                elif row[column][0:2] == '==':
                    df_tablematch.loc[index,column] = 'df_main_table["' + column + '"] == ' + row[column][2:]
                elif row[column][0] == '=':
                    df_tablematch.loc[index,column] = 'df_main_table["' + column + '"] == ' + row[column][1:]
                elif row[column][0:2] == '!*':
                    if row[column][-1] == '*':
                        df_tablematch.loc[index,column] = '~df_main_table["' + column + '"].str.contains("' + row[column][2:-1] +'")'
                    else:
                        df_tablematch.loc[index,column] = '~df_main_table["' + column + '"].str.endswith("' + row[column][2:] +'")'
                elif row[column][0] == '*':
                    if row[column][-1] == '*':
                        df_tablematch.loc[index,column] = 'df_main_table["' + column + '"].str.contains("' + row[column][1:-1] +'")'
                    else:
                        df_tablematch.loc[index,column] = 'df_main_table["' + column + '"].str.endswith("' + row[column][1:] +'")'
                elif row[column][-1] == '*':
                    if row[column][0] == '!':
                        df_tablematch.loc[index,column] = '~df_main_table["' + column + '"].str.startswith("' + row[column][1:-1] +'")'
                    else:
                        df_tablematch.loc[index,column] = 'df_main_table["' + column + '"].str.startswith("' + row[column][:-1] +'")'
                elif row[column][0] == '!':
                    df_tablematch.loc[index,column] = 'df_main_table["' + column + '"] != "' + row[column] +'"'
                else:
                    df_tablematch.loc[index,column] = 'df_main_table["' + column + '"] == "' + row[column] +'"'
        
        if debug:
            print('df_tablematch after transforming rules:')
            print(df_tablematch)

        # Transform data  
        for column in column_inner:
            # Replace Boolean with Strings
            mask_main_table = df_main_table[column].map(type) != bool
            df_main_table[column] = df_main_table[column].where(mask_main_table, df_main_table[column].map(boolean_to_string), axis = 0)
            
            # If not case sensitive, make upper
            if not case_sensitive and df_main_table[column].dtype == object:
                df_main_table[column] = df_main_table[column].str.upper()

        # Initial result column
        df_main_table[tm_result] = None

        if debug:
                print('Final queries:')

        # Create and execute statement
        for index, row in df_tablematch.iterrows():
            # Drop empty cells because no check needed
            row_clean = row.dropna()
            row_clean_columns = row_clean.index.to_list()
            row_clean_columns.remove(tm_result)

            # Start by checking if result column is empty
            query_start = "df_main_table.loc[(df_main_table['" + tm_result + "'].isna())"

            # End by inserting result
            query_end = ", '" + tm_result + "'] = '" + str(row_clean[tm_result]) + "'" # TODO other Dtype than String?

            # Initial mid part
            query_mid = ""
            for row_column in row_clean_columns:
                # Create rules as statement
                query_mid = query_mid + " & (" + row_clean[row_column] + ")"
            
            # Combine query
            query_full = query_start + query_mid + query_end

            if debug:
                print(query_full)
                
            # Execute Statement
            exec(query_full) # TODO RISK OF CODE INJECTION!?
        
        # Output
        if inplace:
            # Returns original Dataframe with the result column
            df_main_table_input[tm_result] = df_main_table[tm_result]
            return df_main_table_input
        else:
            # Returns a new Dataframe with the result column
            output = df_main_table_input.copy()
            output[tm_result] = df_main_table[tm_result]
            return output


# Test Cases

In [88]:
# Test Case Int
Input = pd.DataFrame([
    [1],
    [2],
    [3],
    [4],
    [5],
    [6]], 
    columns = ['Test Number'])
# print(Input)

Rules = pd.DataFrame([
    ['>5', 'Greater than 5'],
    ['<5', 'Smaller than 5'],
    ['>=5', 'Equal, Greater than 5'],
    ['<=5', 'Equal, Smaller than 5'],
    ['==5', 'Eqauls 5'],
    ['!=5', 'Not Equals 5'],
    [None, 'Fallback']], 
    columns = ['Test Number', 'Output'])
# print(Rules)

IntTest = tablematch(
    df_tablematch_input = Rules, 
    df_main_table_input = Input,
    tm_result = "Output",)
print(IntTest)

   Test Number                 Output
0            1         Smaller than 5
1            2         Smaller than 5
2            3         Smaller than 5
3            4         Smaller than 5
4            5  Equal, Greater than 5
5            6         Greater than 5


In [89]:
# Test Case Float
Input = pd.DataFrame([
    [1.1],
    [2.3],
    [3.5],
    [4.6],
    [5.0],
    [6.1]], 
    columns = ['Test Number'])
# print(Input)

Rules = pd.DataFrame([
    ['>5', 'Greater than 5'],
    ['<5', 'Smaller than 5'],
    # ['>=5', 'Equal, Greater than 5'],
    # ['<=5', 'Equal, Smaller than 5'],
    ['==5', 'Eqauls 5'],
    ['!=5', 'Not Equals 5'],
    [None, 'Fallback']], 
    columns = ['Test Number', 'Output'])
# print(Rules)

FloatTest = tablematch(
    df_tablematch_input = Rules, 
    df_main_table_input = Input,
    tm_result = "Output",)
print(FloatTest)

   Test Number          Output
0          1.1  Smaller than 5
1          2.3  Smaller than 5
2          3.5  Smaller than 5
3          4.6  Smaller than 5
4          5.0        Eqauls 5
5          6.1  Greater than 5


In [90]:
# Test Case String
Input = pd.DataFrame([
    ['Hey Hallo'],
    ['Hallo Hey'],
    ['Hey Hallo Hey'],
    ['Hey Ciao'],
    ['Ciao Hey'],
    ['Hey Ciao Hey']], 
    columns = ['Test String'])
# print(Input)

Rules = pd.DataFrame([
    ['*Hallo', 'Ends with *Hallo'],
    ['Hallo*', 'Starts with Hallo*'],
    ['*Hallo*', 'Contains *Hallo*'],
    ['!*Hallo', 'Not Ends with !*Hallo'],
    ['!Hallo*', 'Not Starts with !Hallo*'],
    ['!*Hallo*', 'Not Contains !*Hallo*'],
    [None, 'Fallback']], 
    columns = ['Test String', 'Output'])
# print(Rules)

StringTest = tablematch(
    df_tablematch_input = Rules, 
    df_main_table_input = Input,
    tm_result = "Output",)
print(StringTest)

StringTest2 = tablematch(
    df_tablematch_input = Rules, 
    df_main_table_input = Input,
    tm_result = "Output",
    case_sensitive = False)
print(StringTest2)

     Test String                 Output
0      Hey Hallo       Ends with *Hallo
1      Hallo Hey     Starts with Hallo*
2  Hey Hallo Hey       Contains *Hallo*
3       Hey Ciao  Not Ends with !*Hallo
4       Ciao Hey  Not Ends with !*Hallo
5   Hey Ciao Hey  Not Ends with !*Hallo
     Test String                 Output
0      Hey Hallo       Ends with *Hallo
1      Hallo Hey     Starts with Hallo*
2  Hey Hallo Hey       Contains *Hallo*
3       Hey Ciao  Not Ends with !*Hallo
4       Ciao Hey  Not Ends with !*Hallo
5   Hey Ciao Hey  Not Ends with !*Hallo


In [91]:
# Test Case Boolean
Input = pd.DataFrame([
    [True],
    [False],
    ['True'],
    ['False'],
    ['TRUE'],
    ['FALSE']], 
    columns = ['Boolean String'])
# print(Input)

Rules = pd.DataFrame([
    [True, 'True'],
    [False, 'False'],
    [None, 'Fallback']], 
    columns = ['Boolean String', 'Output'])
# print(Rules)

BooleanTest = tablematch(
    df_tablematch_input = Rules, 
    df_main_table_input = Input,
    tm_result = "Output",)
print(BooleanTest)

BooleanTest2 = tablematch(
    df_tablematch_input = Rules, 
    df_main_table_input = Input,
    tm_result = "Output",
    case_sensitive = False)
print(BooleanTest2)

  Boolean String    Output
0           True      True
1          False     False
2           True      True
3          False     False
4           TRUE  Fallback
5          FALSE  Fallback
  Boolean String Output
0           True   True
1          False  False
2           True   True
3          False  False
4           TRUE   True
5          FALSE  False


In [92]:
# Test Case Blank
Input = pd.DataFrame([
    [],
    [True],
    [False],
    ['True'],
    ['False']], 
    columns = ['Boolean String'])
# print(Input)

Rules = pd.DataFrame([
    ['BLANK', 'BLANK'],
    ['!BLANK', 'Not BLANK'],
    [None, 'Fallback']], 
    columns = ['Boolean String', 'Output'])
# print(Rules)

BooleanTest = tablematch(
    df_tablematch_input = Rules, 
    df_main_table_input = Input,
    tm_result = "Output",)
print(BooleanTest)

BooleanTest2 = tablematch(
    df_tablematch_input = Rules, 
    df_main_table_input = Input,
    tm_result = "Output",
    case_sensitive = False)
print(BooleanTest2)

  Boolean String     Output
0           None      BLANK
1           True  Not BLANK
2          False  Not BLANK
3           True  Not BLANK
4          False  Not BLANK
  Boolean String     Output
0           None      BLANK
1           True  Not BLANK
2          False  Not BLANK
3           True  Not BLANK
4          False  Not BLANK


In [93]:
# Test Case Date
Input = pd.DataFrame([
    ['2021-11-01'],
    ['2021-10-30'],
    ['2021-10-31'],
    ['2021-10-31'],
    ['2021-10-31'],
    ['2021-10-31']], 
    columns = ['Boolean String'], dtype = 'datetime64[ns]')
# print(Input)

Rules = pd.DataFrame([
    ['!="2021-10-31"', 'Not 2021-10-31'],
    ["<'2021-10-31'", 'Smaller than 2021-10-31'],
    ['>="2021-10-31"', 'Equal, Greater than 2021-10-31'],
    ['<="2021-10-31"', 'Equal, Smaller than 2021-10-31'],
    ['=="2021-10-31"', 'Eqauls 2021-10-31'],
    ['>"2021-10-31"', 'Greater than 2021-10-31']], 
    columns = ['Boolean String', 'Output'])
# print(Rules)

DateTest = tablematch(
    df_tablematch_input = Rules, 
    df_main_table_input = Input,
    tm_result = "Output",)
print(DateTest)

  Boolean String                          Output
0     2021-11-01                  Not 2021-10-31
1     2021-10-30                  Not 2021-10-31
2     2021-10-31  Equal, Greater than 2021-10-31
3     2021-10-31  Equal, Greater than 2021-10-31
4     2021-10-31  Equal, Greater than 2021-10-31
5     2021-10-31  Equal, Greater than 2021-10-31


In [94]:
# Test Case Category
Input = pd.DataFrame([
    ['Hey Hallo'],
    ['Hallo Hey'],
    ['Hey Hallo Hey'],
    ['Hey Ciao'],
    ['Ciao Hey'],
    ['Hey Ciao Hey']], 
    columns = ['Test String'], dtype = 'category')
# print(Input)

Rules = pd.DataFrame([
    ['*Hallo', 'Ends with *Hallo'],
    ['Hallo*', 'Starts with Hallo*'],
    ['*Hallo*', 'Contains *Hallo*'],
    ['!*Hallo', 'Not Ends with !*Hallo'],
    ['!Hallo*', 'Not Starts with !Hallo*'],
    ['!*Hallo*', 'Not Contains !*Hallo*'],
    [None, 'Fallback']], 
    columns = ['Test String', 'Output'])
# print(Rules)

StringTest = tablematch(
    df_tablematch_input = Rules, 
    df_main_table_input = Input,
    tm_result = "Output",)
print(StringTest)

StringTest2 = tablematch(
    df_tablematch_input = Rules, 
    df_main_table_input = Input,
    tm_result = "Output",
    case_sensitive = False)
print(StringTest2)

     Test String                 Output
0      Hey Hallo       Ends with *Hallo
1      Hallo Hey     Starts with Hallo*
2  Hey Hallo Hey       Contains *Hallo*
3       Hey Ciao  Not Ends with !*Hallo
4       Ciao Hey  Not Ends with !*Hallo
5   Hey Ciao Hey  Not Ends with !*Hallo
     Test String                 Output
0      Hey Hallo  Not Ends with !*Hallo
1      Hallo Hey  Not Ends with !*Hallo
2  Hey Hallo Hey  Not Ends with !*Hallo
3       Hey Ciao  Not Ends with !*Hallo
4       Ciao Hey  Not Ends with !*Hallo
5   Hey Ciao Hey  Not Ends with !*Hallo


In [95]:
# Test Case multiple rules
Input = pd.DataFrame([
    ['Hey Hallo'],
    ['Hallo Hey'],
    ['Hey Hallo Hey'],
    ['Hey Ciao'],
    ['Ciao Hey'],
    ['Hey Ciao Hey']], 
    columns = ['Test String'])
# print(Input)

Rules = pd.DataFrame([
    ['Hal*|He*', 'Hal OR He'],
    ['Hallo*', 'Starts with Hallo*'],
    ['*Hallo*', 'Contains *Hallo*'],
    ['!*Hallo', 'Not Ends with !*Hallo'],
    ['!Hallo*', 'Not Starts with !Hallo*'],
    ['!*Hallo*', 'Not Contains !*Hallo*'],
    [None, 'Fallback']], 
    columns = ['Test String', 'Output'])
# print(Rules)

StringTest = tablematch(
    df_tablematch_input = Rules, 
    df_main_table_input = Input,
    tm_result = "Output",
    rule_delimiter='|')
print(StringTest)


     Test String                 Output
0      Hey Hallo              Hal OR He
1      Hallo Hey              Hal OR He
2  Hey Hallo Hey              Hal OR He
3       Hey Ciao              Hal OR He
4       Ciao Hey  Not Ends with !*Hallo
5   Hey Ciao Hey              Hal OR He


# Advanced Test Cases

In [96]:
# Test Category
Input = pd.DataFrame([
    [100, 'RSF', 1000, '2010-05-03'],
    [200, 'RRT', 2000, '2010-05-08'],
    [300, 'CAC', 1500, '2010-05-08'],
    [400, 'CAC', 3500, '2010-05-12'],
    [500, 'SIS', 6000, '2010-05-18'],
    [600, 'SIS', 5000, '2010-05-21'],
    [900, 'ACD', 4500, '2010-05-21']
    ], 
    columns = ['Acc No', 'Code', 'Cost', 'Date'])
# print(Input)

Rules = pd.DataFrame([
    ['All', '=100', "RSF,RSG", "Software"],
    ['Servers', '=200', "RRT", "Network"],
    ['Servers', '=300', "CAC,CAD", "Servers"],
    ['Servers', '=400', "CAC,CAD", "Support"],
    ['Storage', '=500, =600', "SIS", "Data Centers"],
    ['All', None, None, 'Unknown']
    ], 
    columns = ['Category', 'Acc No','Code', 'Cost Center'])
# print(Rules)

StringTest = tablematch(
    df_tablematch_input = Rules, 
    df_main_table_input = Input,
    tm_result = 'Cost Center',
    tm_category = 'Category',
    input_category = ['All', 'Servers']
    )
print(StringTest)

   Acc No Code  Cost        Date Cost Center
0     100  RSF  1000  2010-05-03    Software
1     200  RRT  2000  2010-05-08     Network
2     300  CAC  1500  2010-05-08     Servers
3     400  CAC  3500  2010-05-12     Support
4     500  SIS  6000  2010-05-18     Unknown
5     600  SIS  5000  2010-05-21     Unknown
6     900  ACD  4500  2010-05-21     Unknown


In [97]:
# Dokumentation
Input = pd.DataFrame([
    [100, 'Hello World', True, None, '2021-05-03'],
    ], 
    columns = ['Numeric_Example', 'String_Example', 'Boolean_Example', 'Blank_Example', 'Date_Example'])
# print(Input)

Rules = pd.DataFrame([
    ['>0', "Hello*", True, '!BLANK', ">'2021-01-13'", "Hello World"],
    [None, "Hello*, *World", None, None, None, "Hello World"],
    ['(>0&<100)', "Hello*", None, None, None, "Hello World"],
    ], 
    columns = ['Numeric_Example', 'String_Example', 'Boolean_Example', 'Blank_Example', 'Date_Example', 'Result'])
# print(Rules)

StringTest = tablematch(
    df_tablematch_input = Rules, 
    df_main_table_input = Input,
    tm_result = 'Result',
    debug = False
    )
print(StringTest)

   Numeric_Example String_Example  Boolean_Example Blank_Example Date_Example  \
0              100    Hello World             True          None   2021-05-03   

        Result  
0  Hello World  


In [98]:
# Test Case Full
Input = pd.DataFrame([
    [100, 'RSF', 1000, '2010-05-03'],
    [200, 'RRT', 2000, '2010-05-08'],
    [300, 'CAC', 1500, '2010-05-08'],
    [400, 'CAC', 3500, '2010-05-12'],
    [500, 'SIS', 6000, '2010-05-18'],
    [600, 'SIS', 5000, '2010-05-21'],
    [900, 'ACD', 4500, '2010-05-21']
    ], 
    columns = ['Acc No', 'Code', 'Cost', 'Date'])
# print(Input)

Rules = pd.DataFrame([
    ['=100', "RSF,RSG", "Software"],
    ['=200', "RRT", "Network"],
    ['=300', "CAC,CAD", "Servers"],
    ['=400', "CAC,CAD", "Support"],
    ['=500, =600', "SIS", "Data Centers"],
    [None, None, 'Unknown']
    ], 
    columns = ['Acc No','Code', 'Cost Center'])
# print(Rules)

StringTest = tablematch(
    df_tablematch_input = Rules, 
    df_main_table_input = Input,
    tm_result = 'Cost Center',
    )
print(StringTest)

   Acc No Code  Cost        Date   Cost Center
0     100  RSF  1000  2010-05-03      Software
1     200  RRT  2000  2010-05-08       Network
2     300  CAC  1500  2010-05-08       Servers
3     400  CAC  3500  2010-05-12       Support
4     500  SIS  6000  2010-05-18  Data Centers
5     600  SIS  5000  2010-05-21  Data Centers
6     900  ACD  4500  2010-05-21       Unknown


In [99]:
# Test Case Error
Input = pd.DataFrame([
    [100, 'RSF', 1000, '2010-05-03'],
    [200, 'RRT', 2000, '2010-05-08'],
    [300, 'CAC', 1500, '2010-05-08'],
    [400, 'CAC', 3500, '2010-05-12'],
    [500, 'SIS', 6000, '2010-05-18'],
    [600, 'SIS', 5000, '2010-05-21'],
    [900, 'ACD', 4500, '2010-05-21']
    ], 
    columns = ['Acc No', 'Code', 'Cost', 'Date'])
# print(Input)

Rules = pd.DataFrame([
    [None, None, '(>"2010-05-02"&<"2010-05-13")', "DateTest"],
    ['(=100 & !=200)', "RSF,RSG", None, "Software"],
    ['(=100 & !=200)', "RSF,RSG", None, "Software"],
    ['=200', "RRT", None, "Network"],
    ['=300', "CAC,CAD", None, "Servers"],
    ['=400', "CAC,CAD", None, "Support"],
    ['=500, =600', "SIS", None, "Data Centers"],
    [None, None, None, 'Unknown']
    ], 
    columns = ['Acc No','Code', 'Date', 'Cost Center'])
# print(Rules)

StringTest = tablematch(
    df_tablematch_input = Rules, 
    df_main_table_input = Input,
    tm_result = 'Date',
    debug = False
    )
print(StringTest)

ValueError: Column "Date" already exist in dataset!