In [55]:
import pandas as pd
import datetime

class SOT(object):
    def __init__(self, sot_file):
        self.sot_df = pd.read_csv(sot_file)
        self.sot_df['Date'] = pd.to_datetime(self.sot_df['Date'], dayfirst=True) 
        
    def __call__(self):
        return self.sot_df
        
    def save_csv(self):
        print(datetime.datetime.now().strftime("%Y-%m-%d-%H:%M"))
    
    def update_budget(self, budget_records):
        return budget_records

    def prep_second_bank(self, csv_location):
        second_bank = pd.read_csv(csv_location)
        second_bank['Date'] = pd.to_datetime(second_bank['Date'], dayfirst=True)
        second_bank.columns = [col.strip() for col in second_bank]
        return second_bank.sort_index()
    
    def update_bank(self, csv_location):
        second_bank_df = self.prep_second_bank(csv_location)
        updated_sot = self.sot_df
        for n, row in second_bank_df.iterrows():
            cols = second_bank_df.columns
            key = dict(zip(cols,row))  # Passes the row into bank_row_type() with column headers as key
            row_type = self.bank_row_type(key, second_bank_df, updated_sot)
            updated_sot = self.bank_row_type_strategy(row_type, row, second_bank_df, updated_sot)
        return updated_sot

    def bank_row_type(self, key, bank_df, updated_sot):
        date_match = ( abs(updated_sot['Date'] - key['Date']) < pd.Timedelta('4 Days'))
        value_match = (updated_sot['Value'] == key['Value'])        
        description_match = (updated_sot['Description'] == key['Description'])
        account_match = updated_sot['Account Name'] == key['Account Name']
        # TODO need to think about rare case where updated_sot.groupby(['Description','Date', 'Value']).size().sort_values(ascending=False) 
        match_bank_cols = date_match & value_match & description_match
        matched_bank = match_bank_cols.sum()
        if not matched_bank:
            # New bank entry...
            matched_budget = (updated_sot['budget_date'] == key)
            number_budget_matches = matched_budget.sum()
            if number_budget_matches:
                # Matches 
                if number_budget_matches == 1:
                    # found unique match
                    match = updated_sot[matched_budget].index
                    assert len(match) == 1, 'match is not unique'
                    return 'unique match', match[0]
                elif number_budget_matches > 1:
                    # found multiple matches
                    return 'multiple match'
            else:
                # Need to make new row
                return 'brand new'
        else:
            # Entry already found in bank section, no need to do anything
            return 'existing bank match'
        
    def bank_row_type_strategy(self, row_type, row, second_bank_df, updated_sot):
        bank_cols = second_bank_df.columns
        if row_type == 'existing bank match':
            return updated_sot  # Do nothing
        elif row_type == 'brand new':
            new_row = pd.DataFrame(data=row, index=bank_cols).transpose()
            return pd.concat([updated_sot, new_row])  # Add new row to updated_df
        elif row_type[0] == 'unique match':
            correct_row = updated_sot
            for i in range(len(row)):
                updated_sot = updated_sot.set_value(row_type[1], bank_cols[i],row[i])
            return updated_sot  # Match with 
        elif row_type == 'multiple match':
            return updated_sot  # Do nothing
        else:
            raise Exception('unhandled case of bank row type')
                
    


sot = SOT('first_SOT.csv')



In [56]:
x = sot.update_bank('25.11-25.12 -- readied.csv')


Description                                                                                        Date        Value  
'9285 30NOV15 , UDEMY INC. , +14156025987 IE                                                       2015-12-03  -8.00      2
'9285 11OCT15 , HARDEDGE READING , LIMIT , READING GB                                              2015-10-12  -55.00     1
'9285 11DEC15 , GIVEWELL , 6462332035 US , USD 5.00, VRATE 1.5105, FGN PUR FEE 1.00                2015-12-14  -4.31      1
'9285 11DEC15 C , SIMPLY FRESH , LONDON GB                                                         2015-12-14  -12.47     1
'9285 11NOV15 , BB *RAINFOREST , TRUST , 8004564930 US , USD 5.00, VRATE 1.5105, FGN PUR FEE 1.00  2015-11-12  -4.31      1
'9285 11NOV15 , GIVEWELL , 6462332035 US , USD 5.00, VRATE 1.5105, FGN PUR FEE 1.00                2015-11-12  -4.31      1
'9285 11OCT15 , BB *RAINFOREST , TRUST , 8004564930 US , USD 5.00, VRATE 1.5243, FGN PUR FEE 1.00  2015-10-12  -4.28      1
'9285 11OCT15