In [59]:
import pandas as pd
from datetime import datetime
import datetime as dt


## GDP

In [296]:

# Read the GDP data
df = pd.read_csv("./GDP_full.csv")

## select only all industries
df = df.loc[df['North American Industry Classification System (NAICS)'].values == 'All industries [T001]']
# Add dummy day as first day of the month
df['REF_DATE'] = (df['REF_DATE'] + "-01")
# Convert the column to datetime type from string type
df['REF_DATE'] = df['REF_DATE'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))
#Scale adjusted to millions (so multiply by 1 million)
df['VALUE'] = df['VALUE']*1000000
# Create new dataframe with only required columns : date and gdp value
gdp_df =  pd.concat([df['REF_DATE'], df['VALUE']], axis=1, keys=['date', 'value'])
# Create a new column for indicator
gdp_df.insert(loc=1, column='indicator', value="GDP")
gdp_df.head()

Unnamed: 0,date,indicator,value
0,2018-04-01,GDP,1930135000000
1,2018-05-01,GDP,1938797000000
2,2018-06-01,GDP,1939974000000
3,2018-07-01,GDP,1945241000000
4,2018-08-01,GDP,1947286000000


In [297]:
gdp_df['percentage_change'] = 0.0

In [301]:
for i in range(1,gdp_df.shape[0]):
    gdp_df['percentage_change'][i] = ( gdp_df['value'][i]-gdp_df['value'][i-1]/gdp_df['value'][i])*100

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [302]:
gdp_df

Unnamed: 0,date,indicator,value,percentage_change
0,2018-04-01,GDP,1930135000000,0.0
1,2018-05-01,GDP,1938797000000,193879700000000.0
2,2018-06-01,GDP,1939974000000,193997400000000.0
3,2018-07-01,GDP,1945241000000,194524100000000.0
4,2018-08-01,GDP,1947286000000,194728600000000.0
5,2018-09-01,GDP,1948264000000,194826400000000.0
6,2018-10-01,GDP,1951927000000,195192700000000.0
7,2018-11-01,GDP,1947407000000,194740700000000.0
8,2018-12-01,GDP,1947565000000,194756500000000.0
9,2019-01-01,GDP,1952558000000,195255800000000.0


## TSX

In [84]:
df = pd.read_csv('./tsx.csv')
# Convert the column to datetime type from string type
df['Date'] = df['Date'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))
# Create new dataframe with only required columns : date and opening value
tsx_df = pd.concat([df['Date'], df['Open']], axis=1, keys=['date', 'value'])
# Create a new column for indicator
tsx_df.insert(loc=1, column='indicator', value="TSX")

In [231]:
tsx_df.head()

Unnamed: 0,date,indicator,value
0,2019-01-01,TSX,14163.900391
1,2019-02-01,TSX,15562.299805
2,2019-03-01,TSX,16086.299805
3,2019-04-01,TSX,16187.900391
4,2019-05-01,TSX,16580.599609


## Mortgage rates (%)

In [148]:
df = pd.read_csv('./mortgage_rates.csv')
## select only "Interest rates" and "Total, funds advanced, residential mortgages, insured"
df = df[(df['Unit of measure'] == "Interest rate" )& (df['Components'] == "Total, funds advanced, residential mortgages, insured")]
# Add dummy day as first day of the month
df['REF_DATE'] = (df['REF_DATE'] + "-01")
# Convert the column to datetime type from string type
df['REF_DATE'] = df['REF_DATE'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))
# Create new dataframe with only required columns : date and gdp value
mortgage_rate_df =  pd.concat([df['REF_DATE'], df['VALUE']], axis=1, keys=['date', 'value'])
# Create a new column for indicator
mortgage_rate_df.insert(loc=1, column='indicator', value="mortgage_rate")
mortgage_rate_df.shape

(15, 3)

In [186]:
mortgage_rate_df.head()

Unnamed: 0,date,indicator,value
15,2019-01-01,mortgage_rate,3.84
16,2019-02-01,mortgage_rate,3.82
17,2019-03-01,mortgage_rate,3.76
18,2019-04-01,mortgage_rate,3.59
19,2019-05-01,mortgage_rate,3.45


## Interest rates (%)

In [245]:
df = pd.read_csv("./interest_rates.csv")
df = df[(df["Financial market statistics"] == "Overnight money market financing" ) ]
df['REF_DATE'] = df['REF_DATE'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))
# Replace NaNs with 0
df = df.fillna(0)
# Create new dataframe with only required columns 
interest_rates_df =  pd.concat([df['REF_DATE'], df['VALUE']], axis=1, keys=['date', 'value'])
interest_rates_df.head()
# Create a new column for indicator
interest_rates_df.insert(loc=1, column='indicator', value="interest_rate")
interest_rates_df.head()

Unnamed: 0,date,indicator,value
0,2019-01-01,interest_rate,0.0
1,2019-01-02,interest_rate,1.7529
2,2019-01-03,interest_rate,1.7518
3,2019-01-04,interest_rate,1.75
4,2019-01-05,interest_rate,0.0


## Employment rate(%)

In [187]:
df = pd.read_csv('./employment.csv')
df = df[(df["Sex"] == "Both sexes" )& (df['Age group'] == "15 years and over") & (df['GEO'] == "Canada")]

## select only "Both sexes" 
# df = df.loc[df['Sex'].values == "Both sexes" ]
# Add dummy day as first day of the month
df['REF_DATE'] = (df['REF_DATE'] + "-01")
# Convert the column to datetime type from string type
df['REF_DATE'] = df['REF_DATE'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))
# Create new dataframe with only required columns : date and gdp value
employment_df =  pd.concat([df['REF_DATE'], df['VALUE']], axis=1, keys=['date', 'value'])
# Create a new column for indicator
employment_df.insert(loc=1, column='indicator', value="employment")
employment_df.shape

(16, 3)

In [233]:
employment_df

Unnamed: 0,date,indicator,value
0,2019-01-01,employment,60.8
1,2019-02-01,employment,61.1
2,2019-03-01,employment,61.0
3,2019-04-01,employment,61.6
4,2019-05-01,employment,62.7
5,2019-06-01,employment,63.2
6,2019-07-01,employment,62.7
7,2019-08-01,employment,62.7
8,2019-09-01,employment,62.4
9,2019-10-01,employment,62.3


## Housing

In [284]:
df = pd.read_csv('./housing_prices.csv')


In [285]:
df['Date'] = pd.to_datetime(df['Date'], format='%b %Y')
housing_price_df =  pd.concat([df['Date'], df['Composite_HPI']], axis=1, keys=['date', 'value'])
housing_price_df.insert(loc=1, column='indicator', value="housing_price")
housing_price_df['year'] = housing_price_df['date'].apply(lambda x: x.year)


In [286]:
housing_price_df = housing_price_df[(housing_price_df["year"] == 2019 ) | (housing_price_df["year"] == 2020 ) ]

In [287]:
housing_price_df.shape

(16, 4)

In [288]:
housing_price_df =housing_price_df.drop(columns = ['year'])

In [289]:
housing_price_df.reset_index(drop=True, inplace=True)
housing_price_df.head()

Unnamed: 0,date,indicator,value
0,2019-01-01,housing_price,227.4
1,2019-02-01,housing_price,228.2
2,2019-03-01,housing_price,229.8
3,2019-04-01,housing_price,231.3
4,2019-05-01,housing_price,232.0


In [290]:
housing_price_df['percentage_change'] = 0.0

In [291]:
for i in range(1,housing_price_df.shape[0]):
    housing_price_df['percentage_change'][i] = ((housing_price_df['value'][i] - housing_price_df['value'][i-1])/housing_price_df['value'][i])*100

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [293]:
housing_price_df

Unnamed: 0,date,indicator,value,percentage_change
0,2019-01-01,housing_price,227.4,0.0
1,2019-02-01,housing_price,228.2,0.35057
2,2019-03-01,housing_price,229.8,0.696258
3,2019-04-01,housing_price,231.3,0.648508
4,2019-05-01,housing_price,232.0,0.301724
5,2019-06-01,housing_price,232.5,0.215054
6,2019-07-01,housing_price,232.6,0.042992
7,2019-08-01,housing_price,232.8,0.085911
8,2019-09-01,housing_price,233.3,0.214316
9,2019-10-01,housing_price,234.1,0.341734
