In [6]:
#Import dependencies
import pandas as pd
from datetime import datetime as dt
from sqlalchemy import create_engine

In [27]:
###EXTRACT
#Set filepaths
wmt = "WMT.csv"
amz = "AMZN.csv"
tgt = "TGT.csv"
cos = "COST.csv"

In [28]:
#Extract and read skipping the first two rows and set index
wmt_df = pd.read_csv(wmt, skiprows=2, index_col=0)
amz_df = pd.read_csv(amz, skiprows=2, index_col=0)
tgt_df = pd.read_csv(tgt, skiprows=2, index_col=0)
cos_df = pd.read_csv(cos, skiprows=2, index_col=0)
wmt_df

Unnamed: 0,2010-01,2011-01,2012-01,2013-01,2014-01,2015-01,2016-01,2017-01,2018-01,2019-01,TTM
Revenue USD Mil,408214,421849,446950,469162,476294,485651,482130,485873,500343,514405,521086
Gross Margin %,25.4,25.3,25.0,24.9,24.8,24.8,25.1,25.6,25.4,25.1,24.8
Operating Income USD Mil,23950,25542,26558,27801,26872,27147,24105,22764,20437,21957,21313
Operating Margin %,5.9,6.1,5.9,5.9,5.6,5.6,5.0,4.7,4.1,4.3,4.1
Net Income USD Mil,14335,16389,15699,16999,16022,16363,14694,13643,9862,6670,14427
...,...,...,...,...,...,...,...,...,...,...,...
Cash Conversion Cycle,8.61,7.16,8.24,10.03,11.86,12.14,11.12,8.14,3.84,1.81,5.13
Receivables Turnover,101.43,91.38,81.07,73.85,70.85,72.19,77.75,84.80,87.40,86.48,91.44
Inventory Turnover,9.00,9.08,8.70,8.34,8.08,8.11,8.06,8.26,8.60,8.75,7.69
Fixed Assets Turnover,4.12,4.01,4.06,4.10,4.06,4.14,4.14,4.21,4.37,4.55,4.40


In [29]:
###TRANSFORM
#Trranspose dataframes
wmt_df = wmt_df.T
amz_df = amz_df.T
tgt_df = tgt_df.T
cos_df = cos_df.T

In [30]:
#Reset the indexes
wmt_df.reset_index(inplace=True)
amz_df.reset_index(inplace=True)
tgt_df.reset_index(inplace=True)
cos_df.reset_index(inplace=True)

In [31]:
#Rename columns
wmt_df = wmt_df.rename(columns={'index':'Date'})
amz_df = amz_df.rename(columns={'index':'Date'})
tgt_df = tgt_df.rename(columns={'index':'Date'})
cos_df = cos_df.rename(columns={'index':'Date'})

In [32]:
#Revome extraneous rows
wmt_df = wmt_df[:-1]
amz_df = amz_df[:-1]
tgt_df = tgt_df[:-2]
cos_df = cos_df[:-1]

In [33]:
#Drop categories will null values
wmt_df = wmt_df.dropna(axis=1, how='any')
amz_df = amz_df.dropna(axis=1, how='any')
cos_df = cos_df.dropna(axis=1, how='any')
tgt_df = tgt_df.dropna(axis=1, how='any')

In [34]:
#Strip the month from the dates
wmt_df['Date'] = pd.to_datetime(wmt_df['Date'],format='%Y-%m').dt.strftime('%Y')
amz_df['Date'] = pd.to_datetime(amz_df['Date'],format='%Y-%m').dt.strftime('%Y')
tgt_df['Date'] = pd.to_datetime(tgt_df['Date'],format='%Y-%m').dt.strftime('%Y')
cos_df['Date'] = pd.to_datetime(cos_df['Date'],format='%Y-%m').dt.strftime('%Y')

In [35]:
#Filter data to only include categories of interest
wmt_df = wmt_df[['Date', 'Revenue USD Mil','Net Income USD Mil', 'Earnings Per Share USD']]
amz_df = amz_df[['Date', 'Revenue USD Mil','Net Income USD Mil', 'Earnings Per Share USD']]
tgt_df = tgt_df[['Date', 'Revenue USD Mil','Net Income USD Mil', 'Earnings Per Share USD']]
cos_df = cos_df[['Date', 'Revenue USD Mil','Net Income USD Mil', 'Earnings Per Share USD']]

In [36]:
#Merge the Walmart and Amazon tables on the Date
merge1 = pd.merge(wmt_df, amz_df, on=['Date'])

In [37]:
#Rename columns of merged dataframe
merge1.rename(columns={
    'Revenue USD Mil_x':'Walmart Revenue USD Mil',
    'Net Income USD Mil_x':'Walmart Net Income USD Mil',
    'Earnings Per Share USD_x':'Walmart Earnings Per Share USD',
    'Revenue USD Mil_y':'Amazon Revenue USD Mil',
    'Net Income USD Mil_y':'Amazon Net Income USD Mil',
    'Earnings Per Share USD_y':'Amazon Earnings Per Share USD'
}, inplace=True)

In [38]:
#Merge Target table to merged table using outer join to avoid losing 2010 data
merge2 = pd.merge(merge1, tgt_df, on=['Date'], how='outer')

In [39]:
#Rename columns of merged dataframe
merge2.rename(columns={
    'Revenue USD Mil': 'Target Revenue USD Mil',
    'Net Income USD Mil':'Target Net Income USD Mil',
    'Earnings Per Share USD':'Target Earnings Per Share USD'
}, inplace=True)

In [40]:
#Merge Costco table to merged table to make final merge table
merge_df = pd.merge(merge2, cos_df, on=['Date'])

In [41]:
#Rename columns of merged table
merge_df.rename(columns={
    'Revenue USD Mil': 'Costco Revenue USD Mil',
    'Net Income USD Mil':'Costco Net Income USD Mil',
    'Earnings Per Share USD':'Costco Earnings Per Share USD'
}, inplace=True)
merge_df

Unnamed: 0,Date,Walmart Revenue USD Mil,Walmart Net Income USD Mil,Walmart Earnings Per Share USD,Amazon Revenue USD Mil,Amazon Net Income USD Mil,Amazon Earnings Per Share USD,Target Revenue USD Mil,Target Net Income USD Mil,Target Earnings Per Share USD,Costco Revenue USD Mil,Costco Net Income USD Mil,Costco Earnings Per Share USD
0,2010,408214,14335,3.71,34204,1152,2.53,,,,77946,1303,2.92
1,2011,421849,16389,4.47,48077,631,1.37,67390.0,2920.0,4.0,88915,1462,3.3
2,2012,446950,15699,4.52,61093,-39,-0.09,69865.0,2929.0,4.28,99137,1709,3.89
3,2013,469162,16999,5.02,74452,274,0.59,73301.0,2999.0,4.52,105156,2039,4.63
4,2014,476294,16022,4.88,88988,-241,-0.52,72596.0,1971.0,3.07,112640,2058,4.65
5,2015,485651,16363,5.05,107006,596,1.25,72618.0,-1636.0,-2.58,116199,2377,5.37
6,2016,482130,14694,4.57,135987,2371,4.9,73785.0,3363.0,5.31,118719,2350,5.33
7,2017,485873,13643,4.38,177866,3033,6.15,69495.0,2737.0,4.69,129025,2679,6.08
8,2018,500343,9862,3.28,232887,10073,20.14,71879.0,2934.0,5.29,141576,3134,7.09
9,2019,514405,6670,2.26,280522,11588,23.01,75356.0,2937.0,5.51,152703,3659,8.26


In [42]:
#Create tables for each catgory of interest
revenue_df = merge_df[['Date','Walmart Revenue USD Mil','Amazon Revenue USD Mil','Target Revenue USD Mil','Costco Revenue USD Mil']]
netincome_df = merge_df[['Date', 'Walmart Net Income USD Mil', 'Amazon Net Income USD Mil', 'Target Net Income USD Mil', 'Costco Net Income USD Mil']]
earnings_df = merge_df[['Date', 'Walmart Earnings Per Share USD', 'Amazon Earnings Per Share USD', 'Target Earnings Per Share USD', 'Costco Earnings Per Share USD']]

In [43]:
#Set index for final tables
revenue_df = revenue_df.set_index('Date')
netincome_df = netincome_df.set_index('Date')
earnings_df = earnings_df.set_index('Date')

In [412]:
###LOAD
#Create engine to connect to database
connection_string = "ofiglsqd:vVojrG9_zzJZCOLXz8rhKWXk6ivvYqAe@otto.db.elephantsql.com:5432/ofiglsqd"
engine = create_engine(f'postgres://{connection_string}')

In [420]:
#Check database table names to confirm connection
engine.table_names()

['CAfoods',
 'walmart_state_cat',
 'state_store_sale',
 'state_category',
 'walmart_stores',
 'spatial_ref_sys',
 'revenue',
 'net_income',
 'operating_income',
 'shares',
 'earnings']

In [414]:
#Load revenue_df to elephantSQL
revenue_df.to_sql(name='revenue_df', con=engine, if_exists='append', index=True)

In [415]:
#Load netincome_df to elephantSQL
netincome_df.to_sql(name='net_income_df', con=engine, if_exists='append', index=True)

In [418]:
#Load earnings_df to elephantSQL
earnings_df.to_sql(name='earnings_df', con=engine, if_exists='append', index=True)

In [None]:
#Recheck database to ensure successful load
engine.table_names()

In [None]:
# Add primary key constraint to table revenue_df
engine.execute('ALTER TABLE "revenue_df" ADD PRIMARY KEY ("Date")')

In [None]:
# Add primary key constraint to table net_income_df
engine.execute('ALTER TABLE "net_income_df" ADD PRIMARY KEY ("Date")')

In [None]:
# Add primary key constraint to table eearnings_df
engine.execute('ALTER TABLE "earnings_df" ADD PRIMARY KEY ("Date")')