# Data Science – Skills Assessment 
### Rohit Garg

---
# 1. import libraries

In [1]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
pd.set_option('display.max_rows', None)
warnings.filterwarnings("ignore")
plt.style.use('seaborn-white')
plt.rcParams['figure.figsize'] = 15,5
%matplotlib inline

---
# 2. import dataset
* On each successful transfer, the user credits the transfer amount to the Sending Country, and debits it (the transfer amount - transfer fee) at the Receiving country
* Transfer can only be made if the receiving Country has atleast the transfer amount e.g. If user is sending 500 X from USA to UK, it will only be executed if UK has atleast 500 X to give to the user

In [3]:
df = pd.read_csv('1_clean_data.csv')
df = df[['Country', 'Currency', 'Time', 'To']]
df.columns = ['Country', 'Currency', 'Time', 'Amount']
df.shape

(204, 4)

---
# 3. all combinations

In [4]:
a = []
b = []
c = []

for i in df['Country'].unique():
    for j in df['Currency'].unique():
        for k in df['Time'].unique():
            a.append(i)
            b.append(j)
            c.append(k)

df1 = pd.DataFrame({'Country':a, 'Currency':b, 'Time':c})
df1.shape

(225, 3)

In [5]:
df2 = pd.merge(df1, df, how='left', on=['Country', 'Currency', 'Time'])
df2 = df2.fillna(0)
df2.shape

(225, 4)

---
# 4. amounts

In [6]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 225 entries, 0 to 224
Data columns (total 4 columns):
Country     225 non-null object
Currency    225 non-null object
Time        225 non-null int64
Amount      225 non-null float64
dtypes: float64(1), int64(1), object(2)
memory usage: 8.8+ KB


In [7]:
df2['Time_06'] = np.where(df2['Time'].isin([4,5,6,7,8,9]), 'a. 4 am to 9 am', 
                          np.where(df2['Time'].isin([10,11,12,13,14,15]), 'b. 10 am to 3 pm', 
                                   np.where(df2['Time'].isin([16,17,18,19,20,21]), 'c. 4 pm to 9 pm', 'd. 10 pm to 3 am'))) 

df3 = df2.groupby(['Country','Currency','Time_06']).agg({'Amount':'sum'}).reset_index()
df3.columns = ['Country','Currency','Time_06', 'Amount_06']
df3.shape

(36, 4)

In [8]:
df2['Time_12'] = np.where(df2['Time'].isin([10,11,12,13,14,15,16,17,18,19,20,21]), 'a. 10 am to 9 pm', 'b. 10 pm to 9 am')

df4 = df2.groupby(['Country','Currency','Time_12']).agg({'Amount':'sum'}).reset_index()
df4.columns = ['Country','Currency','Time_12', 'Amount_12']
df4.shape

(18, 4)

---
# 5. export dataset

In [9]:
df5 = pd.merge(df2, df3, how='left', on=['Country','Currency','Time_06'])
df5 = pd.merge(df5, df4, how='left', on=['Country','Currency','Time_12'])
df5.to_csv('2_time_series.csv', index=False)
df5.shape

(225, 8)