In [1]:
#Import Dependencies
import pandas as pd
import matplotlib.pyplot as plt

In [2]:

# Read CSV and write it to a dataframe
df_layoffs = pd.read_csv(
    "Resources/Output/layoffs_bydate.csv")

df_fed_interest = pd.read_csv(
    "Resources/Output/fed_interest_rate_bydate.csv")

df_mortgage = pd.read_csv(
    "Resources/Output/mortgage_rates_bydate.csv")

df_unemployment = pd.read_csv(
    "Resources/Output/unemployment_rate.csv")

df_stocks = pd.read_csv(
    "Resources/Output/stock_data.csv")

# add more csvs as more data comes in

df_layoffs.head()

Unnamed: 0,period,laid_off_by_month,funds_raised_by_month
0,2020/03,7850.0,15530.2
1,2020/04,19821.0,43862.0
2,2020/05,14674.0,74191.0
3,2020/06,3926.0,11724.1
4,2020/07,1612.0,4447.0


In [3]:
df_fed_interest.head()

Unnamed: 0,DATE,FEDFUNDS
0,2019/08,2.13
1,2019/09,2.04
2,2019/10,1.83
3,2019/11,1.55
4,2019/12,1.55


In [4]:
df_mortgage.head()

Unnamed: 0,YearMonth,MORTGAGE30US
0,2018-01,4.0325
1,2018-02,4.33
2,2018-03,4.444
3,2018-04,4.4675
4,2018-05,4.586


In [5]:
df_unemployment.head()


Unnamed: 0,DATE,UNRATE
0,8/1/2019,3.6
1,9/1/2019,3.5
2,10/1/2019,3.6
3,11/1/2019,3.6
4,12/1/2019,3.6


In [6]:
df_stocks.head()

Unnamed: 0,Date,stock_open,stock_high,stock_low,stock_close,stock_adj_close,stock_volume
0,2024/08,5537.84,5651.62,5119.26,5648.4,5648.4,81097300000.0
1,2024/07,5471.08,5669.67,5390.95,5522.3,5522.3,80160390000.0
2,2024/06,5297.15,5523.64,5234.32,5460.48,5460.48,76025620000.0
3,2024/05,5029.03,5341.88,5011.05,5277.51,5277.51,86849720000.0
4,2024/04,5257.97,5263.95,4953.56,5035.69,5035.69,81747170000.0


In [7]:
df_unemployment = df_unemployment.astype({"DATE":"datetime64[ns]"})
df_unemployment["DATE"] = df_unemployment['DATE'].dt.strftime('%Y/%m')
df_unemployment.head()

Unnamed: 0,DATE,UNRATE
0,2019/08,3.6
1,2019/09,3.5
2,2019/10,3.6
3,2019/11,3.6
4,2019/12,3.6


In [8]:
# correct YearMonth column from yyyy-mm to yyyy/mm
df_mortgage = df_mortgage.astype({"YearMonth":"datetime64[ns]"})
df_mortgage["YearMonth"] = df_mortgage['YearMonth'].dt.strftime('%Y/%m')
df_mortgage.head()

Unnamed: 0,YearMonth,MORTGAGE30US
0,2018/01,4.0325
1,2018/02,4.33
2,2018/03,4.444
3,2018/04,4.4675
4,2018/05,4.586


In [9]:
# merge first two dataframes, keeping layoffs on left side of join to avoid needing to filter
df2 = pd.merge(df_layoffs,df_mortgage, left_on='period', right_on='YearMonth', how='inner')
df2.drop('YearMonth', axis=1, inplace=True)
df2.rename(columns={'MORTGAGE30US': 'mortgage_rate'}, inplace=True)
df2.head()

Unnamed: 0,period,laid_off_by_month,funds_raised_by_month,mortgage_rate
0,2020/03,7850.0,15530.2,3.45
1,2020/04,19821.0,43862.0,3.306
2,2020/05,14674.0,74191.0,3.2325
3,2020/06,3926.0,11724.1,3.1625
4,2020/07,1612.0,4447.0,3.016


In [10]:
# merge third dataframe into existing merged dataframe, keeping layoffs on left side of join to 
# avoid needing to filter
df3 = pd.merge(df2,df_fed_interest, left_on='period', right_on='DATE', how='inner')
df3.drop('DATE', axis=1, inplace=True)
df3.rename(columns={'FEDFUNDS': 'fed_interest_rate'}, inplace=True)
df3.head()

Unnamed: 0,period,laid_off_by_month,funds_raised_by_month,mortgage_rate,fed_interest_rate
0,2020/03,7850.0,15530.2,3.45,0.65
1,2020/04,19821.0,43862.0,3.306,0.05
2,2020/05,14674.0,74191.0,3.2325,0.05
3,2020/06,3926.0,11724.1,3.1625,0.08
4,2020/07,1612.0,4447.0,3.016,0.09


In [11]:
df4 = pd.merge(df3,df_unemployment, left_on='period', right_on='DATE', how='inner')
df4.drop('DATE', axis=1, inplace=True)
df4.rename(columns={'UNRATE': 'unemployment_rate'}, inplace=True)
df4.head()

Unnamed: 0,period,laid_off_by_month,funds_raised_by_month,mortgage_rate,fed_interest_rate,unemployment_rate
0,2020/03,7850.0,15530.2,3.45,0.65,4.4
1,2020/04,19821.0,43862.0,3.306,0.05,14.8
2,2020/05,14674.0,74191.0,3.2325,0.05,13.2
3,2020/06,3926.0,11724.1,3.1625,0.08,11.0
4,2020/07,1612.0,4447.0,3.016,0.09,10.2


In [12]:
df5 = pd.merge(df4,df_stocks, left_on='period', right_on='Date', how='inner')
df5.drop('Date', axis=1, inplace=True)
df5.head()

Unnamed: 0,period,laid_off_by_month,funds_raised_by_month,mortgage_rate,fed_interest_rate,unemployment_rate,stock_open,stock_high,stock_low,stock_close,stock_adj_close,stock_volume
0,2020/03,7850.0,15530.2,3.45,0.65,4.4,2974.28,3136.72,2191.86,2584.59,2584.59,162185400000.0
1,2020/04,19821.0,43862.0,3.306,0.05,14.8,2498.08,2954.86,2447.49,2912.43,2912.43,123608200000.0
2,2020/05,14674.0,74191.0,3.2325,0.05,13.2,2869.09,3068.67,2766.64,3044.31,3044.31,107135200000.0
3,2020/06,3926.0,11724.1,3.1625,0.08,11.0,3038.78,3233.13,2965.66,3100.29,3100.29,131458900000.0
4,2020/07,1612.0,4447.0,3.016,0.09,10.2,3105.92,3279.99,3101.17,3271.12,3271.12,96928130000.0


old code after here

In [13]:
# update after adding merges
final_df = df5


In [14]:
import os

# Step 1: Create the 'Dataframes' folder if it doesn't exist
folder_path = 'Resources\Output'
if not os.path.exists(folder_path):
    os.makedirs(folder_path)
    print(f"Folder created: {folder_path}")
else:
    print(f"Folder already exists: {folder_path}")

# Step 2: Define the CSV file path
csv_path = os.path.join(folder_path, 'collated_data.csv')

# Step 3: Save the combined DataFrame as a CSV file
final_df.to_csv(csv_path, index=False)
print(f"DataFrame saved as CSV at {csv_path}")


Folder already exists: Resources\Output
DataFrame saved as CSV at Resources\Output\collated_data.csv
