In [1]:
# importing packages
import numpy as np
import matplotlib.pyplot as plt
import scipy
import seaborn as sns
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from numpy.linalg import cond, matrix_rank, norm
from scipy.linalg import inv, solve, det, eig, lu, eigvals
from sklearn.metrics import confusion_matrix

In [138]:
#importing data
BankLend = pd.read_csv('Data/BankLend.csv')
Consumption = pd.read_csv('Data/Consumption.csv')
gdp_exp = pd.read_csv('Data/GDP_ExpenditureBased.csv')
housing_starts = pd.read_csv('Data/HousingStarts.csv')
ex_rate = pd.read_csv('Data/CEER_MONTHLY_NOMINAL-sd-1990-01-01.csv') #Canadian Effective Exchange Rate - monthly nominal
SP_tbills = pd.read_csv('Data/s&p, tbills.csv')
mls_xlsx = pd.ExcelFile('Data/MLS_HPI_November_2023 (1)/Seasonally Adjusted.xlsx')
mls_agg = pd.read_excel(mls_xlsx, sheet_name='AGGREGATE')
house_prices = pd.read_csv('Data/OECD Data.csv')

In [144]:
# BANK LENDING

# Summing over rows of 3 to get quarterly data
def sum_quarter(group):
    return pd.Series({'ReferencePeriod': group['ReferencePeriod'].iloc[0],
                      'Total, funds advanced, residential mortgages, insured 7': group['Total, funds advanced, residential mortgages, insured 7'].sum(),
                      'Total, funds advanced, residential mortgages, uninsured 7': group['Total, funds advanced, residential mortgages, uninsured 7'].sum(),
                      'Total, funds advanced, variable rate mortgages': group['Total, funds advanced, variable rate mortgages'].sum(),
                      'Total, funds advanced for non-mortgage loans, consumer credit': group['Total, funds advanced for non-mortgage loans, consumer credit'].sum(),
                      'Total, funds advanced, business loans 8': group['Total, funds advanced, business loans 8'].sum()})

# Group by sets of 3 rows and average using the custom function
BL_quarter = BankLend.groupby(BankLend.index // 3).apply(sum_quarter)

# Function to generate quarter-year labels
def generate_quarter_labels(year, num_entries):
    quarters = ['Q1', 'Q2', 'Q3', 'Q4']
    return [f"{quarters[i % 4]} {year}" for i in range(num_entries)]

# Bank Lending 
BL_quarter['ReferencePeriod'] = generate_quarter_labels(2013, len(BL_quarter['ReferencePeriod']))
BL_quarter = BL_quarter.rename(columns = {"ReferencePeriod":"Quarters"})

BL_quarter.head()

Unnamed: 0,Quarters,"Total, funds advanced, residential mortgages, insured 7","Total, funds advanced, residential mortgages, uninsured 7","Total, funds advanced, variable rate mortgages","Total, funds advanced for non-mortgage loans, consumer credit","Total, funds advanced, business loans 8"
0,Q1 2013,19334,31399,0.0,40205,145993
1,Q2 2013,31104,49731,0.0,50881,181034
2,Q3 2013,27995,49720,0.0,50063,177620
3,Q4 2013,20997,37640,0.0,44350,200198
4,Q1 2013,19096,32194,0.0,42025,163412


In [78]:
# CONSUMPTION

# keeping specific columns
filtered_cons = Consumption[Consumption['Quarters'].isin(['Household final consumption expenditure'])].reset_index(drop=True)

# transposing the dataset
cons = filtered_cons.set_index('Quarters').T
cons.head()

Quarters,Household final consumption expenditure
Q1 2016,285296
Q2 2016,286760
Q3 2016,289033
Q4 2016,292579
Q1 2017,296967


In [82]:
# GDP 

# keeping specific columns
filtered_gdp = gdp_exp[gdp_exp['Quarters'].isin(['Gross domestic product at market prices'])].reset_index(drop=True)

# transposing the dataset
gdp = filtered_gdp.set_index('Quarters').T
gdp.head()

Quarters,Gross domestic product at market prices
Q1 2016,1949923
Q2 2016,1940335
Q3 2016,1960344
Q4 2016,1971351
Q1 2017,1992778


In [69]:
# EXCHANGE RATE

# keeping one column
filtered_ex_rate = ex_rate[['date','CEER_BROADNM']]
ceer = filtered_ex_rate.reset_index(drop=True)

# Averaging over rows of 3 to get quarterly data
def average_quarter(group):
    return pd.Series({'date': group['date'].iloc[0],
                      'CEER_BROADNM': group['CEER_BROADNM'].mean()})

# Group by sets of 3 rows and average using the custom function
ceer_quarter = ceer.groupby(ceer.index // 3).apply(average_quarter).reset_index(drop=True)

# Exchange Rate
ceer_quarter['date'] = generate_quarter_labels(2013, len(ceer_quarter['date']))
ceer_quarter = ceer_quarter.rename(columns = {"date":"Quarters"})
ceer_quarter.head()

Unnamed: 0,Quarters,CEER_BROADNM
0,Q1 2013,98.053333
1,Q2 2013,101.366667
2,Q3 2013,100.096667
3,Q4 2013,100.486667
4,Q1 2013,102.433333


In [68]:
# FINANCIAL INDICATORS - S&P 500 & Treasury Bills

# Averaging over rows of 3 to get quarterly data
def average_quarter(group):
    return pd.Series({'nummonth': group['nummonth'].iloc[0],
                      'sp': group['sp'].mean(), 't_bills': group['t_bills'].mean()})

# Group by sets of 3 rows and average using the custom function
fin_ind_quarter = SP_tbills.groupby(SP_tbills.index // 3).apply(average_quarter).reset_index(drop=True)

# creating the time series column
data = {'date': pd.date_range(start='2013-01-01', end='2022-10-01', freq='M')}
df = pd.DataFrame(data)

# Turning it into a quarterly periods
df['quarterly_period'] = df['date'].dt.to_period('Q')
df['formatted_quarter'] = df['quarterly_period'].dt.strftime('Q%q %Y')

# concatinating the dataframe with the financial indicators dataframe
fin_ind = pd.concat([df, fin_ind_quarter], axis=1)

# cleaning up the dataframe
drop_columns = ['date', 'quarterly_period', 'nummonth']
fin_ind = fin_ind.drop(drop_columns, axis=1)
fin_ind = fin_ind.rename(columns = {"formatted_quarter":"Quarters"})
fin_ind.head()

Unnamed: 0,Quarters,sp,t_bills
0,Q1 2013,36782.44,0.953333
1,Q1 2013,36089.766667,1.01
2,Q1 2013,37059.5,0.99
3,Q2 2013,39750.513333,0.916667
4,Q2 2013,41900.943333,0.876667


In [95]:
# HOUSING PRICES

# Convert the 'TIME' column to datetime format
house_prices['TIME'] = pd.to_datetime(house_prices['TIME'])

# Format the 'TIME' column as "Q1 2001", "Q2 2001", etc.
house_prices['TIME'] = house_prices['TIME'].dt.to_period('Q').dt.strftime('Q%q %Y')
house_prices = house_prices.rename(columns = {"TIME":"Quarters"})
house_prices.head()

Unnamed: 0,Quarters,Value
0,Q1 2001,40.844646
1,Q2 2001,41.20098
2,Q3 2001,41.875943
3,Q4 2001,42.620664
4,Q1 2002,43.340875


In [149]:
# combining the dataframes
#merge1 = pd.merge(BL_quarter, cons, on = 'Quarters')
#merge2 = pd.merge(merge1, gdp, on = 'Quarters')
#merge3 = pd.merge(merge2, ceer_quarter, on = 'Quarters')
#merge4 = pd.merge(merge3, fin_ind, on = 'Quarters')
#final_df = pd.merge(merge4, house_prices, on = 'Quarters')



KeyError: 'Quarters'