In [185]:
# importing packages
import numpy as np
import matplotlib.pyplot as plt
import scipy
import seaborn as sns
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from numpy.linalg import cond, matrix_rank, norm
from scipy.linalg import inv, solve, det, eig, lu, eigvals
from sklearn.metrics import confusion_matrix

In [391]:
#importing data
BankLend = pd.read_csv('Data/BankLend.csv')
Consumption = pd.read_csv('Data/Consumption.csv')
gdp_exp = pd.read_csv('Data/GDP_ExpenditureBased.csv')
housing_starts = pd.read_csv('Data/HousingStarts.csv')
ex_rate = pd.read_csv('Data/CEER_MONTHLY_NOMINAL-sd-1990-01-01.csv') #Canadian Effective Exchange Rate - monthly nominal
SP_tbills = pd.read_csv('Data/s&p, tbills.csv')
mls_xlsx = pd.ExcelFile('Data/MLS_HPI_November_2023 (1)/Seasonally Adjusted.xlsx')
mls_agg = pd.read_excel(mls_xlsx, sheet_name='AGGREGATE')
house_prices = pd.read_csv('Data/OECD Data.csv')

In [389]:
# BANK LENDING

# Summing over rows of 3 to get quarterly data
def sum_quarter(group):
    return pd.Series({'ReferencePeriod': group['ReferencePeriod'].iloc[0],
                      'Total, funds advanced, residential mortgages, insured 7': group['Total, funds advanced, residential mortgages, insured 7'].sum(),
                      'Total, funds advanced, residential mortgages, uninsured 7': group['Total, funds advanced, residential mortgages, uninsured 7'].sum(),
                      'Total, funds advanced, variable rate mortgages': group['Total, funds advanced, variable rate mortgages'].sum(),
                      'Total, funds advanced for non-mortgage loans, consumer credit': group['Total, funds advanced for non-mortgage loans, consumer credit'].sum(),
                      'Total, funds advanced, business loans 8': group['Total, funds advanced, business loans 8'].sum()})

# Group by sets of 3 rows and average using the custom function
BL_quarter = BankLend.groupby(BankLend.index // 3).apply(average_quarter).reset_index(drop=True)

KeyError: 'nummonth'

In [382]:
# CONSUMPTION

# keeping specific columns
filtered_cons = Consumption[Consumption['Estimates'].isin(['Household final consumption expenditure'])].reset_index(drop=True)

# transposing the dataset
cons = filtered_cons.set_index('Estimates').T
cons.head()

Estimates,Household final consumption expenditure
Q1 2016,285296
Q2 2016,286760
Q3 2016,289033
Q4 2016,292579
Q1 2017,296967


In [383]:
# GDP 

# keeping specific columns
filtered_gdp = gdp_exp[gdp_exp['Estimates'].isin(['Gross domestic product at market prices'])].reset_index(drop=True)

# transposing the dataset
gdp = filtered_gdp.set_index('Estimates').T
gdp.head()

Estimates,Gross domestic product at market prices
Q1 2016,1949923
Q2 2016,1940335
Q3 2016,1960344
Q4 2016,1971351
Q1 2017,1992778


In [384]:
# EXCHANGE RATE

# keeping one column
filtered_ex_rate = ex_rate[['date','CEER_BROADNM']]
ceer = filtered_ex_rate.reset_index(drop=True)

# Averaging over rows of 3 to get quarterly data
def average_quarter(group):
    return pd.Series({'date': group['date'].iloc[0],
                      'CEER_BROADNM': group['CEER_BROADNM'].mean()})

# Group by sets of 3 rows and average using the custom function
ceer_quarter = ceer.groupby(ceer.index // 3).apply(average_quarter).reset_index(drop=True)
ceer_quarter.head()

Unnamed: 0,date,CEER_BROADNM
0,1999-01-01,98.053333
1,1999-04-01,101.366667
2,1999-07-01,100.096667
3,1999-10-01,100.486667
4,2000-01-01,102.433333


In [393]:
# FINANCIAL INDICATORS - S&P 500 & Treasury Bills

# Averaging over rows of 3 to get quarterly data
def average_quarter(group):
    return pd.Series({'nummonth': group['nummonth'].iloc[0],
                      'sp': group['sp'].mean(), 't_bills': group['t_bills'].mean()})

# Group by sets of 3 rows and average using the custom function
fin_ind_quarter = SP_tbills.groupby(SP_tbills.index // 3).apply(average_quarter).reset_index(drop=True)
fin_ind_quarter.head()

Unnamed: 0,nummonth,sp,t_bills
0,2013m1,36782.44,0.953333
1,2013m4,36089.766667,1.01
2,2013m7,37059.5,0.99
3,2013m10,39750.513333,0.916667
4,2014m1,41900.943333,0.876667


In [386]:
# renaming to have consistent quarter names

# Function to generate quarter-year labels
def generate_quarter_labels(year, num_entries):
    quarters = ['Q1', 'Q2', 'Q3', 'Q4']
    return [f"{quarters[i % 4]} {year}" for i in range(num_entries)]

# Bank Lending 
BL_quarter['ReferencePeriod'] = generate_quarter_labels(2013, len(BL_quarter['ReferencePeriod']))

# Exchange Rate
ceer_quarter['date'] = generate_quarter_labels(2013, len(ceer_quarter['date']))


In [402]:
SP_tbills['quarter_year'] = pd.to_datetime(SP_tbills['nummonth'].str.replace('m', ''), format='%Y%m').dt.to_period('Q').astype(str)
SP_tbills['quarter_year'] = SP_tbills['quarter_year'].apply(lambda x: f"Q{x.split('Q')[1]} {x.split('Q')[0]}")
# Rename the 'nummonth' column
#SP_tbills.rename(columns={'nummonth': 'original_column'}, inplace=True)
SP_tbills.drop(columns=['nummonth'], inplace=True)


KeyError: 'nummonth'