# Preppin' Data 2023W05

#### Load data

In [15]:
import pandas as pd
import datetime as dt

In [51]:
df = pd.read_csv('2023W05_input.csv')

In [52]:
df.head()

Unnamed: 0,Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date
0,DTB-716-679-576,1448,100001,2,20/03/2023 00:00:00
1,DS-795-814-303,7839,100001,2,15/11/2023 00:00:00
2,DSB-807-592-406,5520,100005,1,14/07/2023 00:00:00
3,DS-367-545-264,7957,100007,2,18/08/2023 00:00:00
4,DSB-474-374-857,5375,100000,2,26/08/2023 00:00:00


#### Create the bank code by splitting out off the letters from the Transaction code

In [53]:
df['Bank'] = df['Transaction Code'].str.split('-').str[0]

In [54]:
df.head()

Unnamed: 0,Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date,Bank
0,DTB-716-679-576,1448,100001,2,20/03/2023 00:00:00,DTB
1,DS-795-814-303,7839,100001,2,15/11/2023 00:00:00,DS
2,DSB-807-592-406,5520,100005,1,14/07/2023 00:00:00,DSB
3,DS-367-545-264,7957,100007,2,18/08/2023 00:00:00,DS
4,DSB-474-374-857,5375,100000,2,26/08/2023 00:00:00,DSB


#### Change transaction date to the just be the month of the transaction

In [55]:
df['Transaction Date'] = pd.to_datetime(df['Transaction Date'], dayfirst=True).dt.strftime('%B')

In [56]:
df.head()

Unnamed: 0,Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date,Bank
0,DTB-716-679-576,1448,100001,2,March,DTB
1,DS-795-814-303,7839,100001,2,November,DS
2,DSB-807-592-406,5520,100005,1,July,DSB
3,DS-367-545-264,7957,100007,2,August,DS
4,DSB-474-374-857,5375,100000,2,August,DSB


#### drop additional columns

In [57]:
df = df.drop(['Transaction Code', 'Customer Code', 'Online or In-Person'], axis=1)

#### Total up the transaction values so you have one row for each bank and month combination

In [None]:
df2 = df.groupby(['Bank','Transaction Date'])['Value'].sum().reset_index()

In [None]:
df2.head()

Unnamed: 0,Bank,Transaction Date,Value
0,DS,April,40785
1,DS,August,102237
2,DS,December,33952
3,DS,February,31204
4,DS,January,50207


#### Rank transactions of banks

In [102]:
df2['Bank Rank per Month'] = df2.groupby(['Transaction Date'])['Value'].rank(ascending=False).astype(int)

In [104]:
df2.head()

Unnamed: 0,Bank,Transaction Date,Value,Bank Rank per Month
0,DS,April,40785,2
1,DS,August,102237,1
2,DS,December,33952,2
3,DS,February,31204,2
4,DS,January,50207,2


#### Average rank per bank

In [122]:
# groupby.mean() returned NaN
df2['Avg Rank per Bank'] = df2.groupby(['Bank'])['Bank Rank per Month'].transform('mean')

In [123]:
df2.head()

Unnamed: 0,Bank,Transaction Date,Value,Bank Rank per Month,Avg Rank per Bank
0,DS,April,40785,2,1.916667
1,DS,August,102237,1,1.916667
2,DS,December,33952,2,1.916667
3,DS,February,31204,2,1.916667
4,DS,January,50207,2,1.916667


#### Average transaction value per rank

In [127]:
df2['Avg Tranasction Value per Rank'] = df2.groupby(['Bank Rank per Month'])['Value'].transform('mean')

In [128]:
df2.head()

Unnamed: 0,Bank,Transaction Date,Value,Bank Rank per Month,Avg Rank per Bank,Avg Tranasction Value per Rank
0,DS,April,40785,2,1.916667,48633.666667
1,DS,August,102237,1,1.916667,66967.75
2,DS,December,33952,2,1.916667,48633.666667
3,DS,February,31204,2,1.916667,48633.666667
4,DS,January,50207,2,1.916667,48633.666667


#### Output

In [130]:
df2.to_csv('2023W05_output.csv', index=False)