In [59]:
import pandas as pd
from datetime import datetime
import datetime as dt


## GDP

In [86]:

# Read the GDP data
df = pd.read_csv("./GDP.csv")

## select only all industries
df = df.loc[df['North American Industry Classification System (NAICS)'].values == 'All industries [T001]']
# Add dummy day as first day of the month
df['REF_DATE'] = (df['REF_DATE'] + "-01")
# Convert the column to datetime type from string type
df['REF_DATE'] = df['REF_DATE'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))
#Scale adjusted to millions (so multiply by 1 million)
df['VALUE'] = df['VALUE']*1000000v
# Create new dataframe with only required columns : date and gdp value
gdp_df =  pd.concat([df['REF_DATE'], df['VALUE']], axis=1, keys=['date', 'value'])
# Create a new column for indicator
gdp_df.insert(loc=1, column='indicator', value="GDP")
gdp_df.head()

Unnamed: 0,date,indicator,value
0,2019-01-01,GDP,1952558000000
1,2019-02-01,GDP,1948783000000
2,2019-03-01,GDP,1961298000000
3,2019-04-01,GDP,1966131000000
4,2019-05-01,GDP,1971712000000


In [88]:
gdp_df.shape

(15, 3)

## TSX

In [84]:
df = pd.read_csv('./tsx.csv')
# Convert the column to datetime type from string type
df['Date'] = df['Date'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))
# Create new dataframe with only required columns : date and opening value
tsx_df = pd.concat([df['Date'], df['Open']], axis=1, keys=['date', 'value'])
# Create a new column for indicator
tsx_df.insert(loc=1, column='indicator', value="TSX")

In [231]:
tsx_df.head()

Unnamed: 0,date,indicator,value
0,2019-01-01,TSX,14163.900391
1,2019-02-01,TSX,15562.299805
2,2019-03-01,TSX,16086.299805
3,2019-04-01,TSX,16187.900391
4,2019-05-01,TSX,16580.599609


## Mortgage rates (%)

In [148]:
df = pd.read_csv('./mortgage_rates.csv')
## select only "Interest rates" and "Total, funds advanced, residential mortgages, insured"
df = df[(df['Unit of measure'] == "Interest rate" )& (df['Components'] == "Total, funds advanced, residential mortgages, insured")]
# Add dummy day as first day of the month
df['REF_DATE'] = (df['REF_DATE'] + "-01")
# Convert the column to datetime type from string type
df['REF_DATE'] = df['REF_DATE'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))
# Create new dataframe with only required columns : date and gdp value
mortgage_rate_df =  pd.concat([df['REF_DATE'], df['VALUE']], axis=1, keys=['date', 'value'])
# Create a new column for indicator
mortgage_rate_df.insert(loc=1, column='indicator', value="mortgage_rate")
mortgage_rate_df.shape

(15, 3)

In [186]:
mortgage_rate_df.head()

Unnamed: 0,date,indicator,value
15,2019-01-01,mortgage_rate,3.84
16,2019-02-01,mortgage_rate,3.82
17,2019-03-01,mortgage_rate,3.76
18,2019-04-01,mortgage_rate,3.59
19,2019-05-01,mortgage_rate,3.45


## Interest rates (%)

In [245]:
df = pd.read_csv("./interest_rates.csv")
df = df[(df["Financial market statistics"] == "Overnight money market financing" ) ]
df['REF_DATE'] = df['REF_DATE'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))
# Replace NaNs with 0
df = df.fillna(0)
# Create new dataframe with only required columns 
interest_rates_df =  pd.concat([df['REF_DATE'], df['VALUE']], axis=1, keys=['date', 'value'])
interest_rates_df.head()
# Create a new column for indicator
interest_rates_df.insert(loc=1, column='indicator', value="interest_rate")
interest_rates_df.head()

Unnamed: 0,date,indicator,value
0,2019-01-01,interest_rate,0.0
1,2019-01-02,interest_rate,1.7529
2,2019-01-03,interest_rate,1.7518
3,2019-01-04,interest_rate,1.75
4,2019-01-05,interest_rate,0.0


## Employment rate(%)

In [187]:
df = pd.read_csv('./employment.csv')
df = df[(df["Sex"] == "Both sexes" )& (df['Age group'] == "15 years and over") & (df['GEO'] == "Canada")]

## select only "Both sexes" 
# df = df.loc[df['Sex'].values == "Both sexes" ]
# Add dummy day as first day of the month
df['REF_DATE'] = (df['REF_DATE'] + "-01")
# Convert the column to datetime type from string type
df['REF_DATE'] = df['REF_DATE'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))
# Create new dataframe with only required columns : date and gdp value
employment_df =  pd.concat([df['REF_DATE'], df['VALUE']], axis=1, keys=['date', 'value'])
# Create a new column for indicator
employment_df.insert(loc=1, column='indicator', value="employment")
employment_df.shape

(16, 3)

In [233]:
employment_df

Unnamed: 0,date,indicator,value
0,2019-01-01,employment,60.8
1,2019-02-01,employment,61.1
2,2019-03-01,employment,61.0
3,2019-04-01,employment,61.6
4,2019-05-01,employment,62.7
5,2019-06-01,employment,63.2
6,2019-07-01,employment,62.7
7,2019-08-01,employment,62.7
8,2019-09-01,employment,62.4
9,2019-10-01,employment,62.3


## Housing

In [223]:
df = pd.read_csv('./housing_prices.csv')
df.head()

Unnamed: 0,Date,Composite_HPI,Single_Family_HPI,One_Storey_HPI,Two_Storey_HPI,Townhouse_HPI,Apartment_HPI,Composite_Benchmark,Single_Family_Benchmark,One_Storey_Benchmark,Two_Storey_Benchmark,Townhouse_Benchmark,Apartment_Benchmark
0,Jan 2005,100.0,100.0,100.0,100.0,100.0,100.0,256900.0,281400.0,229100.0,322300.0,211800.0,191000.0
1,Feb 2005,101.0,101.1,101.2,101.0,100.6,100.9,259500.0,284500.0,231900.0,325500.0,213100.0,192800.0
2,Mar 2005,102.0,102.2,102.3,102.2,101.2,101.4,262100.0,287600.0,234400.0,329400.0,214400.0,193700.0
3,Apr 2005,102.9,103.3,103.4,103.2,101.9,102.1,264400.0,290700.0,236900.0,332600.0,215800.0,195000.0
4,May 2005,103.6,104.0,104.4,103.7,102.6,102.9,266200.0,292600.0,239200.0,334200.0,217300.0,196600.0


In [206]:
type(df['Date'][0])

str

In [224]:
df['Date'] = pd.to_datetime(df['Date'], format='%b %Y')
housing_price_df =  pd.concat([df['Date'], df['Composite_HPI']], axis=1, keys=['date', 'value'])
housing_price_df.insert(loc=1, column='indicator', value="housing_price")
housing_price_df['year'] = housing_price_df['date'].apply(lambda x: x.year)


In [227]:
housing_price_df = housing_price_df[(housing_price_df["year"] == 2019 ) | (housing_price_df["year"] == 2020 ) ]

In [229]:
housing_price_df.shape

(16, 4)

In [230]:
housing_price_df.drop(columns = ['year'])

Unnamed: 0,date,indicator,value
168,2019-01-01,housing_price,227.4
169,2019-02-01,housing_price,228.2
170,2019-03-01,housing_price,229.8
171,2019-04-01,housing_price,231.3
172,2019-05-01,housing_price,232.0
173,2019-06-01,housing_price,232.5
174,2019-07-01,housing_price,232.6
175,2019-08-01,housing_price,232.8
176,2019-09-01,housing_price,233.3
177,2019-10-01,housing_price,234.1
