In [1]:
### RUN EVERY TIME: MONEY BACK VALUES COMPARING

In [2]:
### RUN EVERY TIME: INITIALIZATION

import pandas as pd
import numpy as np
from datetime import date, datetime

In [3]:
### RUN EVERY TIME: GENERAL DATA PREPARATION

### Constants:
All = slice(None)
### Paths:
str_mtb_mb_list_source = 'Test_Files/mtb_mb.html'
str_alpha_valued_mb_reg_source = 'Test_Files/alpha_valued_mb_reg.xlsx'
str_alpha_zero_mb_reg_source = 'Test_Files/alpha_zero_mb_reg.xlsx'

In [4]:
### RUN EVERY TIME: MTBANK MONEY BACK LIST EXPORT

df_mtb_raw = pd.read_html(str_mtb_mb_list_source, match = 'MCC', skiprows = list(range(18)), header = 1, parse_dates = True)[0][: -1]
df_mtb_mb = df_mtb_raw[df_mtb_raw.columns[[0, 1, 3, 4, 7, 9, 10]]]
df_mtb_mb.columns = ['Transaction_Date', 'Account_Date', 'Retail_Name', 'MCC', 'Purchase_Sum', 'MTB_MB_Percent', 'MTB_MB_Sum']
df_mtb_mb = df_mtb_mb.astype({'Transaction_Date': 'datetime64[ns]',
                              'Account_Date': 'datetime64[ns]',
                              'Retail_Name': 'str',
                              'MCC': 'int',
                              'Purchase_Sum': 'float',
                              'MTB_MB_Percent': 'float',
                              'MTB_MB_Sum': 'float',
                             })
df_mtb_mb['Purchase_Sum'] = df_mtb_mb['Purchase_Sum'] / 100
df_mtb_mb['MTB_MB_Sum'] = df_mtb_mb['MTB_MB_Sum'] / 100
df_mtb_mb.loc[df_mtb_mb['MTB_MB_Percent'] == 5, 'MTB_MB_Percent'] = 0.5
#df_mtb_mb

In [5]:
### RUN EVERY TIME: ALPA BANK MONEY BACK ZERO MCC REGISTRY

df_alpha_zero_mb_reg = pd.read_excel(str_alpha_zero_mb_reg_source, sheet_name = 0)
df_alpha_zero_mb_reg.columns = ['MCC_lists', 'MCC_Description']
df_alpha_zero_mb_reg = df_alpha_zero_mb_reg.astype({'MCC_lists': 'str',
                                                    'MCC_Description': 'str',
                                                   })
df_alpha_zero_mb_reg['MCC_lists'] = (df_alpha_zero_mb_reg['MCC_lists'] + ', ').replace({r',\n': ', '}, regex = True)
df_alpha_zero_mb_reg['MCC_Description'] = df_alpha_zero_mb_reg['MCC_Description'].replace({r'\n': ' ', '  ': ' '}, regex = True)
df_alpha_zero_mb_reg = \
        pd.concat([df_alpha_zero_mb_reg['MCC_Description'], df_alpha_zero_mb_reg['MCC_lists'].str.split(', ', expand = True).replace({'': np.NaN, None: np.NaN})], axis = 1)
ser_alpha_zero_mb_reg = df_alpha_zero_mb_reg.set_index('MCC_Description').stack().reset_index(1, drop = True).astype('int')
ser_alpha_zero_mb_reg.name = 'MCC'
ser_alpha_zero_mb_reg = ser_alpha_zero_mb_reg.to_frame().reset_index().set_index('MCC').squeeze()
#ser_alpha_zero_mb_reg

In [6]:
### RUN EVERY TIME: ALPA BANK MONEY BACK VALUED MCC CATEGORIES REGISTRY

df_alpha_valued_mb_reg = pd.read_excel(str_alpha_valued_mb_reg_source, sheet_name = 0)
df_alpha_valued_mb_reg.columns = ['Category_Name', 'MCC_lists', 'Category_Description']
df_alpha_valued_mb_reg = df_alpha_valued_mb_reg.astype({'Category_Name': 'str',
                                                        'MCC_lists': 'str',
                                                        'Category_Description': 'str',                                                        
                                                       })
df_alpha_valued_mb_reg['MCC_lists'] = df_alpha_valued_mb_reg['MCC_lists'].replace({r',\n': ', ', 
                                                                                   '5531 - 5533': '5531, 5532, 5533', 
                                                                                   '7511- 7513': '7511, 7512, 7513', 
                                                                                   '5712-5714': '5712, 5713, 5714',
                                                                                   '5044-5046': '5044, 5045, 5046',
                                                                                   '5697 - 5699': '5697, 5698, 5699',
                                                                                   '3000 - 4000': '4000',
                                                                                   '5812-5816': '5812, 5813, 5814, 5815, 5816',
                                                                                   '8041 - 8044': '8041, 8042, 8043, 8044'
                                                                                  }, regex = True)
df_alpha_valued_mb_reg = \
        pd.concat([df_alpha_valued_mb_reg['Category_Name'], df_alpha_valued_mb_reg['MCC_lists'].str.split(', ', expand = True).replace({None: np.NaN})], axis = 1)
ser_alpha_valued_mb_reg = df_alpha_valued_mb_reg.set_index('Category_Name').stack().reset_index(1, drop = True).astype('int')
ser_alpha_valued_mb_reg.name = 'MCC'
ser_alpha_valued_mb_reg = ser_alpha_valued_mb_reg.to_frame().reset_index().set_index('MCC').squeeze()
ser_alpha_valued_mb_reg.name = 'Category'
#ser_alpha_valued_mb_reg

In [25]:
###  RUN EVERY TIME: ALTERNATIVE VALUATION

df_aplha_zero = ser_alpha_zero_mb_reg.to_frame()
df_aplha_zero['Alpha_MB_Percent'] = 0.0
df_aplha_zero.rename(columns = {'MCC_Description' : 'Alpha_Description'}, inplace = True)
df_alpha_valued = ser_alpha_valued_mb_reg.to_frame()
df_alpha_valued['Alpha_MB_Percent'] = 1.0
df_alpha_valued.rename(columns = {'Category' : 'Alpha_Description'}, inplace = True)
df_alpha_reg = pd.concat([df_aplha_zero, df_alpha_valued], axis = 0, sort = False)
df_alpha_reg.loc[df_alpha_reg['Alpha_Description'] == 'Автомобиль', 'Alpha_MB_Percent'] = 1.0
df_compare_mb = df_mtb_mb.join(df_alpha_reg, on = 'MCC', how = 'left').drop('Account_Date', axis = 1)
df_compare_mb['Alpha_MB_Percent'] = df_compare_mb['Alpha_MB_Percent'].fillna(0)
df_compare_mb['Alpha_Description'] = df_compare_mb['Alpha_Description'].fillna('Не обнаружено')
df_compare_mb['Alpha_MB_Sum'] = (df_compare_mb['Purchase_Sum'] * df_compare_mb['Alpha_MB_Percent'] / 100).round(2)
print(df_compare_mb[['MTB_MB_Sum', 'Alpha_MB_Sum']].sum())
#df_compare_mb.to_excel('Test_Files/MB_Comparing_Results.xlsx', merge_cells = False)
df_compare_mb.groupby('Alpha_Description').apply(lambda df_group: df_group[['MTB_MB_Sum', 'Alpha_MB_Sum']].sum())

MTB_MB_Sum      16.65
Alpha_MB_Sum    14.41
dtype: float64


Unnamed: 0_level_0,MTB_MB_Sum,Alpha_MB_Sum
Alpha_Description,Unnamed: 1_level_1,Unnamed: 2_level_1
Автомобиль,0.56,0.55
Не обнаружено,0.25,0.0
Покупки,2.85,2.85
Путешествия,0.24,0.24
Развлечения,1.45,1.45
Семья и личные расходы,7.29,7.29
Спорт,2.03,2.03
"операции, связанные с покупкой продуктов питания",1.98,0.0
