In [34]:
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import hvplot.pandas

from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.metrics import mean_squared_error

In [None]:
#opening the Assets and Liabilities of Commercial Bank data. Weekly estimated aggregated balance sheet for U.S. commercial banks,

csv_H8 = Path("Assets and Liabilities of Commercial Banks.csv")
as_Liab_df = pd.read_csv(csv_H8, skiprows=0)
as_Liab_df.columns = as_Liab_df.columns.str.strip().str.lower().str.replace(' ', '_')
as_Liab_df.dropna()



csv_H6 = Path("Money Stock Measures.csv")
money_st_df = pd.read_csv(csv_H6, skiprows=0)
money_st_df.columns = money_st_df.columns.str.strip().str.lower().str.replace(' ', '_')
money_st_df.dropna()

csv_G19 = Path("Consumer Credit.csv")
credit_Cr_df = pd.read_csv(csv_G19, skiprows=0)
credit_Cr_df.columns = credit_Cr_df.columns.str.strip().str.lower().str.replace(' ', '_')
credit_Cr_df.dropna()

merged_df = pd.merge(as_Liab_df, money_st_df, on='series_description', how='inner')
final_df = pd.merge(merged_df, credit_Cr_df, on='series_description', how='inner')
final_df.dropna()

In [None]:
final_df.rename(columns={'series_description': 'date'}, inplace=True)

# Define the mapping from old column names to the new names you provided
column_mapping = {
    'bank_credit,_all_commercial_banks,_not_seasonally_adjusted': 'bank_credit',
    'securities_in_bank_credit,_all_commercial_banks,_not_seasonally_adjusted': 'securities_in_bank_credit',
    'treasury_and_agency_securities,_all_commercial_banks,_not_seasonally_adjusted': 'treasury_agency_securities',
    'other_securities,_all_commercial_banks,_not_seasonally_adjusted': 'other_securities',
    'real_estate_loans,_all_commercial_banks,_not_seasonally_adjusted': 'real_estate_loans',
    'total_assets,_all_commercial_banks,_not_seasonally_adjusted': 'total_assets',
    'currency;_seasonally_adjusted': 'currency_sa',
    'demand_deposits;_seasonally_adjusted': 'demand_deposits_sa',
    'monetary_base;_currency_in_circulation;_not_seasonally_adjusted': 'monetary_base_currency_nsa',
    'monetary_base;_total;_not_seasonally_adjusted': 'monetary_base_total_nsa',
    'percent_change_of_total_consumer_credit,_seasonally_adjusted_at_an_annual_rate': 'pct_change_consumer_credit_sa',
    'total_consumer_credit_owned_and_securitized,_seasonally_adjusted_level': 'consumer_credit_owned_securitized_sa',
    'total_consumer_credit_owned_by_nonfinancial_business,_not_seasonally_adjusted_level': 'consumer_credit_nonfinancial_owned_nsa',
    'total_consumer_credit_securitized_by_nonfinancial_business,_not_seasonally_adjusted_level': 'consumer_credit_nonfinancial_securitized_nsa',
    'total_consumer_credit_owned_by_credit_unions,_not_seasonally_adjusted_level': 'consumer_credit_credit_unions_owned_nsa',
}

# Rename the columns in final_df based on the mapping
final_df.rename(columns=column_mapping, inplace=True)

# Verify the result
# Define the list of columns you want to keep (those you provided)
columns_to_keep = [
    'date',
    'bank_credit',
    'securities_in_bank_credit',
    'treasury_agency_securities',
    'other_securities',
    'real_estate_loans',
    'total_assets',
    'currency_sa',
    'demand_deposits_sa',
    'monetary_base_currency_nsa',
    'monetary_base_total_nsa',
    'pct_change_consumer_credit_sa',
    'consumer_credit_owned_securitized_sa',
    'consumer_credit_nonfinancial_owned_nsa',
    'consumer_credit_nonfinancial_securitized_nsa',
    'consumer_credit_credit_unions_owned_nsa'
]

# Drop the columns that are not in the 'columns_to_keep' list
final_df = final_df[columns_to_keep]

# Verify the result
final_df.head()



In [None]:
final_df['date'] = pd.to_datetime(final_df['date'], format='%Y-%m')
final_df.to_csv('output data/final_data.csv', index=False)

In [7]:
numeric_df = final_df.select_dtypes(include='number')
numeric_df = numeric_df.dropna()  
scaler = StandardScaler()
scaled_data = scaler.fit_transform(numeric_df)
scaled_df = pd.DataFrame(scaled_data, columns=numeric_df.columns, index=numeric_df.index)
        

In [None]:


inertia = []

k = list(range(1, 11))

#Loop through the list of k values, fit a K-Means model
for i in k:
    k_model = KMeans(n_clusters=i, random_state=2)
    k_model.fit(scaled_df)
    inertia.append(k_model.inertia_)

# DataFrame to store the elbow data for plotting
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)

df_elbow.hvplot.line(x='k', y='inertia', title='Elbow Curve', xlabel='Number of clusters (k)', ylabel='Inertia')


In [14]:
#k-means clusters variables grouping 
model = KMeans(n_clusters=4, random_state=1)

model.fit(scaled_df)
k_lower = model.predict(scaled_df)
bank_predictions_df = scaled_df.copy()
#class column with the labels to the bank_predictions_df DataFrame
bank_predictions_df['clusters_lower'] = k_lower

In [None]:
print(home_sales_predictions_df.columns)

In [None]:
home_sales_predictions_df.hvplot.scatter(
    x="consumer_credit_owned_securitized_sa",
    y="total_assets",
    by="clusters_lower"
).opts(yformatter="%.0f")

In [None]:
home_sales_predictions_df.hvplot.scatter(
    x="real_estate_loans",
    y="demand_deposits_sa",
    by="clusters_lower"
).opts(yformatter="%.0f")

In [None]:
month_decomp = final_df
month_decomp.rename({'date': 'month'}, axis=1, inplace=True)
month_decomp['month'] = pd.to_datetime(month_decomp['month'])
month_decomp.set_index(['month'], inplace=True)
month_decomp

In [None]:
components =seasonal_decompose(month_decomp['consumer_credit_credit_unions_owned_nsa'], model='multiplicative')
components.plot()
plt.show()

In [None]:
componentstwo =seasonal_decompose(month_decomp['demand_deposits_sa'], model='multiplicative')
componentstwo.plot()
plt.show()