In [1]:
import pandas as pd
from sqlalchemy import create_engine
from datetime import datetime

## EXTRACT

In [2]:
df_exrates = pd.read_csv('D:/binar/platinum_challenge/Exchange_Rates.csv')

In [3]:
df_exrates

Unnamed: 0,Date,Currency,Exchange
0,1/1/2015,USD,1.0000
1,1/1/2015,CAD,1.1583
2,1/1/2015,AUD,1.2214
3,1/1/2015,EUR,0.8237
4,1/1/2015,GBP,0.6415
...,...,...,...
11210,2/20/2021,USD,1.0000
11211,2/20/2021,CAD,1.2610
11212,2/20/2021,AUD,1.2723
11213,2/20/2021,EUR,0.8238


## PROFILING

In [4]:
df_exrates.columns

Index(['Date', 'Currency', 'Exchange'], dtype='object')

In [5]:
df_exrates.duplicated()

0        False
1        False
2        False
3        False
4        False
         ...  
11210    False
11211    False
11212    False
11213    False
11214    False
Length: 11215, dtype: bool

In [6]:
sum(df_exrates.duplicated())

0

In [7]:
#null check
df_exrates.isna().sum()


Date        0
Currency    0
Exchange    0
dtype: int64

In [9]:
df_exrates[df_exrates.isna().any(axis=1)]

Unnamed: 0,Date,Currency,Exchange


In [10]:
df_exrates.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11215 entries, 0 to 11214
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Date      11215 non-null  object 
 1   Currency  11215 non-null  object 
 2   Exchange  11215 non-null  float64
dtypes: float64(1), object(2)
memory usage: 263.0+ KB


In [11]:
list(df_exrates['Currency'].unique())

['USD', 'CAD', 'AUD', 'EUR', 'GBP']

## CLEANSING

In [12]:
df_exrates_clean = df_exrates.copy()

In [13]:
df_exrates_clean

Unnamed: 0,Date,Currency,Exchange
0,1/1/2015,USD,1.0000
1,1/1/2015,CAD,1.1583
2,1/1/2015,AUD,1.2214
3,1/1/2015,EUR,0.8237
4,1/1/2015,GBP,0.6415
...,...,...,...
11210,2/20/2021,USD,1.0000
11211,2/20/2021,CAD,1.2610
11212,2/20/2021,AUD,1.2723
11213,2/20/2021,EUR,0.8238


In [14]:
df_exrates_clean.columns = map(str.lower, df_exrates_clean.columns)

In [15]:
df_exrates_clean

Unnamed: 0,date,currency,exchange
0,1/1/2015,USD,1.0000
1,1/1/2015,CAD,1.1583
2,1/1/2015,AUD,1.2214
3,1/1/2015,EUR,0.8237
4,1/1/2015,GBP,0.6415
...,...,...,...
11210,2/20/2021,USD,1.0000
11211,2/20/2021,CAD,1.2610
11212,2/20/2021,AUD,1.2723
11213,2/20/2021,EUR,0.8238


In [16]:
df_exrates_clean['date'] = pd.to_datetime(df_exrates_clean['date'])

In [17]:
df_exrates_clean

Unnamed: 0,date,currency,exchange
0,2015-01-01,USD,1.0000
1,2015-01-01,CAD,1.1583
2,2015-01-01,AUD,1.2214
3,2015-01-01,EUR,0.8237
4,2015-01-01,GBP,0.6415
...,...,...,...
11210,2021-02-20,USD,1.0000
11211,2021-02-20,CAD,1.2610
11212,2021-02-20,AUD,1.2723
11213,2021-02-20,EUR,0.8238


In [18]:
df_exrates_clean['date'] = pd.to_datetime(df_exrates_clean['date']).dt.normalize()

In [19]:
df_exrates_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11215 entries, 0 to 11214
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   date      11215 non-null  datetime64[ns]
 1   currency  11215 non-null  object        
 2   exchange  11215 non-null  float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 263.0+ KB


In [21]:
df_exrates_clean['currency_detail_code'] = df_exrates_clean['date'].dt.strftime('%Y-%m-%d') + '_' + df_exrates_clean['currency']

print("\nDataFrame dengan kolom 'currency_detail_code':")
print(df_exrates_clean)


DataFrame dengan kolom 'currency_detail_code':
            date currency  exchange    currency_key currency_detail_code
0     2015-01-01      USD    1.0000  2015-01-01_USD       2015-01-01_USD
1     2015-01-01      CAD    1.1583  2015-01-01_CAD       2015-01-01_CAD
2     2015-01-01      AUD    1.2214  2015-01-01_AUD       2015-01-01_AUD
3     2015-01-01      EUR    0.8237  2015-01-01_EUR       2015-01-01_EUR
4     2015-01-01      GBP    0.6415  2015-01-01_GBP       2015-01-01_GBP
...          ...      ...       ...             ...                  ...
11210 2021-02-20      USD    1.0000  2021-02-20_USD       2021-02-20_USD
11211 2021-02-20      CAD    1.2610  2021-02-20_CAD       2021-02-20_CAD
11212 2021-02-20      AUD    1.2723  2021-02-20_AUD       2021-02-20_AUD
11213 2021-02-20      EUR    0.8238  2021-02-20_EUR       2021-02-20_EUR
11214 2021-02-20      GBP    0.7126  2021-02-20_GBP       2021-02-20_GBP

[11215 rows x 5 columns]


In [23]:
df_exrates_clean = df_exrates_clean.drop('currency_key', axis=1)
print(df_exrates_clean)

            date currency  exchange currency_detail_code
0     2015-01-01      USD    1.0000       2015-01-01_USD
1     2015-01-01      CAD    1.1583       2015-01-01_CAD
2     2015-01-01      AUD    1.2214       2015-01-01_AUD
3     2015-01-01      EUR    0.8237       2015-01-01_EUR
4     2015-01-01      GBP    0.6415       2015-01-01_GBP
...          ...      ...       ...                  ...
11210 2021-02-20      USD    1.0000       2021-02-20_USD
11211 2021-02-20      CAD    1.2610       2021-02-20_CAD
11212 2021-02-20      AUD    1.2723       2021-02-20_AUD
11213 2021-02-20      EUR    0.8238       2021-02-20_EUR
11214 2021-02-20      GBP    0.7126       2021-02-20_GBP

[11215 rows x 4 columns]


In [24]:
df_exrates_clean

Unnamed: 0,date,currency,exchange,currency_detail_code
0,2015-01-01,USD,1.0000,2015-01-01_USD
1,2015-01-01,CAD,1.1583,2015-01-01_CAD
2,2015-01-01,AUD,1.2214,2015-01-01_AUD
3,2015-01-01,EUR,0.8237,2015-01-01_EUR
4,2015-01-01,GBP,0.6415,2015-01-01_GBP
...,...,...,...,...
11210,2021-02-20,USD,1.0000,2021-02-20_USD
11211,2021-02-20,CAD,1.2610,2021-02-20_CAD
11212,2021-02-20,AUD,1.2723,2021-02-20_AUD
11213,2021-02-20,EUR,0.8238,2021-02-20_EUR


In [25]:
first_column = df_exrates_clean.pop('currency_detail_code')
df_exrates_clean.insert(0, 'currency_detail_code', first_column)

print("\nDataFrame dengan kolom 'currency_detail_code' di posisi pertama:")
print(df_exrates_clean)


DataFrame dengan kolom 'currency_detail_code' di posisi pertama:
      currency_detail_code       date currency  exchange
0           2015-01-01_USD 2015-01-01      USD    1.0000
1           2015-01-01_CAD 2015-01-01      CAD    1.1583
2           2015-01-01_AUD 2015-01-01      AUD    1.2214
3           2015-01-01_EUR 2015-01-01      EUR    0.8237
4           2015-01-01_GBP 2015-01-01      GBP    0.6415
...                    ...        ...      ...       ...
11210       2021-02-20_USD 2021-02-20      USD    1.0000
11211       2021-02-20_CAD 2021-02-20      CAD    1.2610
11212       2021-02-20_AUD 2021-02-20      AUD    1.2723
11213       2021-02-20_EUR 2021-02-20      EUR    0.8238
11214       2021-02-20_GBP 2021-02-20      GBP    0.7126

[11215 rows x 4 columns]


In [26]:
df_exrates_clean

Unnamed: 0,currency_detail_code,date,currency,exchange
0,2015-01-01_USD,2015-01-01,USD,1.0000
1,2015-01-01_CAD,2015-01-01,CAD,1.1583
2,2015-01-01_AUD,2015-01-01,AUD,1.2214
3,2015-01-01_EUR,2015-01-01,EUR,0.8237
4,2015-01-01_GBP,2015-01-01,GBP,0.6415
...,...,...,...,...
11210,2021-02-20_USD,2021-02-20,USD,1.0000
11211,2021-02-20_CAD,2021-02-20,CAD,1.2610
11212,2021-02-20_AUD,2021-02-20,AUD,1.2723
11213,2021-02-20_EUR,2021-02-20,EUR,0.8238


## INGESTION

In [27]:
from sqlalchemy import create_engine

In [28]:
connection = 'postgresql://postgres:nunu16@localhost:5432/postgres'
engine = create_engine(connection)
conn2 = engine.connect()
print(conn2)

<sqlalchemy.engine.base.Connection object at 0x000001C9BBC3AAE0>


In [29]:
df_exrates_clean.to_sql("exchange_rates",
                    con=conn2,
                    schema="platinum_base_layer",
                    if_exists="replace",
                    index= False,
                    index_label="index",
                    method="multi")

11215

In [31]:
from sqlalchemy import text

In [32]:
query_pk = [
"alter table platinum_base_layer.exchange_rates add primary key(currency_detail_code)"
]
for query in query_pk:
    conn_2 = engine.connect()
    conn_2.execute(text(query))
    conn_2.commit()
    conn_2.close()