# Preppin Data 2023 Week 1

source: https://preppindata.blogspot.com/2023/01/2023-week-1-data-source-bank.html

### Load file

In [188]:
import pandas as pd
import datetime as dt

In [174]:
df = pd.read_csv('2023W01_input.csv')

In [175]:
df.head()

Unnamed: 0,Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date
0,DTB-716-679-576,1448,100001,2,20/03/2023 00:00:00
1,DS-795-814-303,7839,100001,2,15/11/2023 00:00:00
2,DSB-807-592-406,5520,100005,1,14/07/2023 00:00:00
3,DS-367-545-264,7957,100007,2,18/08/2023 00:00:00
4,DSB-474-374-857,5375,100000,2,26/08/2023 00:00:00


### Create new column for Bank

#### Splits the Transaction Code column by moving the letters to a new column

In [176]:
df['Bank'] = df['Transaction Code'].str.split('-').str[0]

In [177]:
df.head()

Unnamed: 0,Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date,Bank
0,DTB-716-679-576,1448,100001,2,20/03/2023 00:00:00,DTB
1,DS-795-814-303,7839,100001,2,15/11/2023 00:00:00,DS
2,DSB-807-592-406,5520,100005,1,14/07/2023 00:00:00,DSB
3,DS-367-545-264,7957,100007,2,18/08/2023 00:00:00,DS
4,DSB-474-374-857,5375,100000,2,26/08/2023 00:00:00,DSB


### Rename Online or In-Person values. 1 = Online and 2 = In-Person

In [178]:
df['Online or In-Person'] = df['Online or In-Person'].replace({1:'Online', 2:'In-Person'})

In [179]:
df.head()

Unnamed: 0,Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date,Bank
0,DTB-716-679-576,1448,100001,In-Person,20/03/2023 00:00:00,DTB
1,DS-795-814-303,7839,100001,In-Person,15/11/2023 00:00:00,DS
2,DSB-807-592-406,5520,100005,Online,14/07/2023 00:00:00,DSB
3,DS-367-545-264,7957,100007,In-Person,18/08/2023 00:00:00,DS
4,DSB-474-374-857,5375,100000,In-Person,26/08/2023 00:00:00,DSB


### Change day to be day of the week

#### Had to add dayfirst=True as pandas was defining the wrong day name

In [180]:
df['Transaction Date'] = pd.to_datetime(df['Transaction Date'], dayfirst=True).dt.strftime('%A')

In [181]:
df.head()

Unnamed: 0,Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date,Bank
0,DTB-716-679-576,1448,100001,In-Person,Monday,DTB
1,DS-795-814-303,7839,100001,In-Person,Wednesday,DS
2,DSB-807-592-406,5520,100005,Online,Friday,DSB
3,DS-367-545-264,7957,100007,In-Person,Friday,DS
4,DSB-474-374-857,5375,100000,In-Person,Saturday,DSB


## Three outputs required

### Output 1: total values of transactions by each bank

In [182]:
df1 = df.groupby('Bank')['Value'].sum()

In [183]:
df1

Bank
DS     653940
DSB    530489
DTB    618238
Name: Value, dtype: int64

In [54]:
df1.to_csv('2023W01_output_1.csv')

### Output 2: Total Values by Bank, Day of the Week and Type of Transaction

In [184]:
df2 = df.groupby(['Bank', 'Online or In-Person', 'Transaction Date'])['Value'].sum()

In [185]:
df2

Bank  Online or In-Person  Transaction Date
DS    In-Person            Friday              58599
                           Monday              42806
                           Saturday            34867
                           Sunday              51301
                           Thursday            75582
                           Tuesday             32607
                           Wednesday           63686
      Online               Friday              58731
                           Monday              33563
                           Saturday            71357
                           Sunday              21761
                           Thursday            13337
                           Tuesday             36639
                           Wednesday           59104
DSB   In-Person            Friday               9402
                           Monday              43546
                           Saturday            72679
                           Sunday              37755
  

In [118]:
df2.to_csv('2023W01_output_2.csv')

### Output 3: total values by Bank and Customer Code

In [186]:
df3 = df.groupby(['Bank', 'Customer Code'])['Value'].sum()

In [187]:
df3

Bank  Customer Code
DS    100000           57909
      100001           53063
      100002           69803
      100003           25482
      100004           63315
      100005           39668
      100006           77636
      100007           76190
      100008           56400
      100009           56581
      100010           77893
DSB   100000           27585
      100001           67856
      100002           27936
      100003           58154
      100004           39003
      100005           56396
      100006           32333
      100007           29702
      100008           47121
      100009           51749
      100010           92654
DTB   100000           77252
      100001           60675
      100002           48616
      100003           84574
      100004           44435
      100005           37795
      100006           41909
      100007           29308
      100008           69352
      100009           52926
      100010           71396
Name: Value, dtype: int

In [91]:
df3.to_csv('2023W01_output_3.csv')