In [198]:
import numpy as np
from numpy import random as rnd
from matplotlib import pyplot as plt
import datetime,os

import torch
import pandas as pd

from fuzzywuzzy import fuzz

import re
from itertools import product

In [141]:
newdatapath = '..\\..\\..\\..\\TF_data\\ADMIN\\v2\\'
storepath = '..\\..\\..\\..\\TF_data\\ADMIN\\v2\\04_DATA\\'

In [140]:
col_rename_dict = {
    'Valutadatum':'Date_ordered', 
    'Buchungstag':'Date_booked', 
    'Buchungstext':'Text_transaction', 
    'Verwendungszweck':'Use',
    'Beguenstigter/Zahlungspflichtiger':'Contact', 
    'Kontonummer/IBAN':'AccNum',
    'Kontonummer':'AccNum',
    'Betrag':'Value_transaction',
    'Auftragskonto':'OrderAccount', 
    'Glaeubiger ID':'LenderID', 
    'Mandatsreferenz':'MandateReference',
    'Kundenreferenz (End-to-End)':'CustomerReference', 
    'Sammlerreferenz':'CollectorReference',
    'Lastschrift Ursprungsbetrag':'Amount0', 
    'Auslagenersatz Ruecklastschrift':'Amount1',
    'BIC (SWIFT-Code)':'BIC', 
    'BLZ':'BIC',
    'Waehrung':'Currency', 
    'Info':'Information'
}

In [502]:
df_list = []

c_path = newdatapath+'01_SingleTransactions\\'
for c_file in os.listdir(c_path):
    f_name = c_file.split('.')[0].lower()
    f_term = c_file.split('.')[-1].lower()
    if f_term=='xls' or f_term=='xlsx':
        df_list.append(pd.read_excel(c_path+c_file).drop(columns=[
            'Glaeubiger ID',
            'Mandatsreferenz',
            'Kundenreferenz (End-to-End)',
            'Sammlerreferenz',
            'Lastschrift Ursprungsbetrag',
            'Auslagenersatz Ruecklastschrift'
        ],errors='ignore').rename(columns=col_rename_dict))
        if c_file=='20191215.xlsx':
            df_list[-1]['Value_transaction'] = df_list[-1]['Value_transaction'].apply(lambda x: float(str(x)[:-2]+'.'+str(x)[-2:]))
    elif f_term=='csv':
        df_list.append(pd.read_csv(c_path+c_file,sep=';',encoding='latin-1').drop(columns=[
            'Glaeubiger ID',
            'Mandatsreferenz',
            'Kundenreferenz (End-to-End)',
            'Sammlerreferenz',
            'Lastschrift Ursprungsbetrag',
            'Auslagenersatz Ruecklastschrift'
        ],errors='ignore').rename(columns=col_rename_dict))
        if c_file=='20191215.xlsx':
            df_list[-1]['Value_transaction'] = df_list[-1]['Value_transaction'].apply(lambda x: float(str(x)[:-2]+'.'+str(x)[-2:]))
    else:
        #print('Skipping file {}'.format(c_file))
        continue
    #print('Loaded file {}'.format(c_file))

In [503]:
df = pd.concat(df_list).reset_index(drop=True)

In [504]:
df['Date_booked'] = pd.to_datetime(df['Date_booked'],format='%d.%m.%y')
df['Date_ordered'] = pd.to_datetime(df['Date_ordered'],format='%d.%m.%y')

In [505]:
df['Value_transaction'] = df['Value_transaction'].apply(lambda x: str(x).replace(',','.')).astype(np.float64)

In [506]:
for colname in ['Text_transaction','Use','Contact']:
    df[colname] = df[colname].map(lambda x: re.sub(r'\W+', ' ', str(x)))

In [507]:
df = df.drop_duplicates(subset=[  
    'OrderAccount', 
    'Date_booked', 
    'Date_ordered', 
    'Text_transaction',
    'Use', 
    #'Contact', 
    'AccNum', 
    'BIC', 
    'Value_transaction', 
    'Currency',
    'Information'
],keep='first').reset_index(drop=True)

In [508]:
df.to_excel(storepath+'STF.xlsx')

In [509]:
# Extract relevant information
tf = df[['Date_booked','Value_transaction']]
tf = tf.drop(index=tf[tf['Value_transaction']>=0].index).reset_index(drop=True)

In [510]:
# Generate interesting time indices
tf['YYYY-MM'] = pd.to_datetime(tf['Date_booked'].apply(lambda x: x.strftime('%Y-%m')))
tf['YYYY-MM-DD'] = pd.to_datetime(tf['Date_booked'].apply(lambda x: x.strftime('%Y-%m-%d')))
# Extract single date identifiers
#tf['DD'] = tf['Date_booked'].apply(lambda x: x.day)
#tf['MM'] = tf['Date_booked'].apply(lambda x: x.month)
#tf['YYYY'] = tf['Date_booked'].apply(lambda x: x.year)
# Drop unformatted column
tf = tf.drop(columns=['Date_booked'])

In [511]:
# Accumulate by months and days 
rf_monthly = tf[['YYYY-MM','Value_transaction']].groupby(['YYYY-MM']).sum().abs()
rf_daily = tf[['YYYY-MM-DD','Value_transaction']].groupby(['YYYY-MM-DD']).sum().abs()

In [512]:
# Extract full date
rf_monthly['Date'] = rf_monthly.apply(lambda x: x.index)
rf_daily['Date'] = rf_daily.apply(lambda x: x.index)
# Extract years
rf_daily['Year'] = rf_daily.apply(lambda x: x['Date'].year,axis=1)
rf_monthly['Year'] = rf_monthly.apply(lambda x: x['Date'].year,axis=1)
# Extract months
rf_daily['Month'] = rf_daily.apply(lambda x: x['Date'].month,axis=1)
rf_monthly['Month'] = rf_monthly.apply(lambda x: x['Date'].month,axis=1)
# Extract days
rf_daily['Day'] = rf_daily.apply(lambda x: x['Date'].day,axis=1)
rf_monthly['Day'] = rf_monthly.apply(lambda x: x['Date'].day,axis=1)

In [513]:
# Determine current dates of interest
c_date = tf['YYYY-MM-DD'].max()
p_date = c_date-pd.DateOffset(months=1)
pp_date = c_date-pd.DateOffset(months=2)
ppp_date = c_date-pd.DateOffset(months=3)

In [514]:
# Current month
start_date = '{}-{}-{}'.format(str(c_date.year),str(str(c_date.month).zfill(2)),'01')
end_date = '{}-{}-{}'.format(str(c_date.year if c_date.month!=12 else c_date.year+1),str(str((c_date.month)%12+1).zfill(2)),'01')
c_index = pd.date_range(
    start=start_date,
    end=end_date
)[:-1]
tt = rf_daily[(rf_daily['Year']==c_date.year)&(rf_daily['Month']==c_date.month)][['Value_transaction']]
m0 = tt.reindex(c_index,fill_value=0)

In [515]:
# Current month - 1
start_date = '{}-{}-{}'.format(str(p_date.year),str(str(p_date.month).zfill(2)),'01')
end_date = '{}-{}-{}'.format(str(p_date.year if p_date.month!=12 else p_date.year+1),str(str((p_date.month)%12+1).zfill(2)),'01')
c_index = pd.date_range(
    start=start_date,
    end=end_date
)[:-1]
tt = rf_daily[(rf_daily['Year']==p_date.year)&(rf_daily['Month']==p_date.month)][['Value_transaction']]
m1 = tt.reindex(c_index,fill_value=0)

In [516]:
# Current month - 2
start_date = '{}-{}-{}'.format(str(pp_date.year),str(str(pp_date.month).zfill(2)),'01')
end_date = '{}-{}-{}'.format(str(pp_date.year if pp_date.month!=12 else pp_date.year+1),str(str((pp_date.month)%12+1).zfill(2)),'01')
c_index = pd.date_range(
    start=start_date,
    end=end_date
)[:-1]
tt = rf_daily[(rf_daily['Year']==pp_date.year)&(rf_daily['Month']==pp_date.month)][['Value_transaction']]
m2 = tt.reindex(c_index,fill_value=0)

In [517]:
# Current month - 3
start_date = '{}-{}-{}'.format(str(ppp_date.year),str(str(ppp_date.month).zfill(2)),'01')
end_date = '{}-{}-{}'.format(str(ppp_date.year if ppp_date.month!=12 else ppp_date.year+1),str(str((ppp_date.month)%12+1).zfill(2)),'01')
c_index = pd.date_range(
    start=start_date,
    end=end_date
)[:-1]
tt = rf_daily[(rf_daily['Year']==ppp_date.year)&(rf_daily['Month']==ppp_date.month)][['Value_transaction']]
m3 = tt.reindex(c_index,fill_value=0)

In [518]:
writer = pd.ExcelWriter(storepath+'RunningMonthlyExp.xlsx',engine='openpyxl')

m0.to_excel(writer,sheet_name='p0')
m1.to_excel(writer,sheet_name='p1')
m2.to_excel(writer,sheet_name='p2')
m3.to_excel(writer,sheet_name='p3')

In [519]:
bf = df[['Date_booked','Value_transaction']]

pf = bf[bf['Value_transaction']>0].reset_index(drop=True)
nf = bf[bf['Value_transaction']<0].reset_index(drop=True)

pf['YYYY-MM'] = pd.to_datetime(pf['Date_booked'].apply(lambda x: x.strftime('%m-%Y')))
nf['YYYY-MM'] = pd.to_datetime(nf['Date_booked'].apply(lambda x: x.strftime('%m-%Y')))

In [520]:
pf = pf.drop(columns=['Date_booked'])
nf = nf.drop(columns=['Date_booked'])

In [521]:
pf = pf.groupby(['YYYY-MM']).sum()
nf = nf.groupby(['YYYY-MM']).sum()

In [522]:
mf = pd.merge(nf,pf,on='YYYY-MM').rename(columns={
    'Value_transaction_x':'expenses',
    'Value_transaction_y':'revenue'
})
mf['expenses'] = mf['expenses'].abs()

In [523]:
mf.to_excel(storepath+'MonthlyExpRevAggregate.xlsx')

In [524]:
at = pd.read_excel(newdatapath+'02_AccountTotal\\AccountTracker.xlsx',parse_dates=['Date'],index_col='Date')

In [525]:
at.to_excel(storepath+'ATR.xlsx')

In [526]:
dp = 

Unnamed: 0_level_0,Account Total,Min Month,Max Month
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-12-31,1359.33,1359.33,1413.99
2019-01-31,1016.79,231.83,1659.33
2019-02-28,1093.33,676.47,1487.39
2019-03-31,1198.54,796.99,1529.94
2019-04-30,-16.39,-16.39,1508.56
2019-05-31,1259.92,-83.17,1546.77
2019-06-30,2858.6,521.28,3007.05
2019-07-31,3754.34,1378.07,3783.22
2019-08-31,4853.12,2802.13,5106.48
2019-09-30,6230.5,4146.89,6686.24
