In [26]:
# Import dependencies
import pandas as pd
import datetime
import numpy as np

# Clean FEDFUNDS data

In [27]:
# Load file and Dataframe
ff = pd.read_excel("Working_Folder/Data/FEDFUNDS.xls", skiprows=10)
ff

Unnamed: 0,observation_date,FEDFUNDS
0,1954-07-01,0.80
1,1954-08-01,1.22
2,1954-09-01,1.07
3,1954-10-01,0.85
4,1954-11-01,0.83
...,...,...
810,2022-01-01,0.08
811,2022-02-01,0.08
812,2022-03-01,0.20
813,2022-04-01,0.33


In [28]:
# Check dtypes
print(ff.dtypes)

observation_date    datetime64[ns]
FEDFUNDS                   float64
dtype: object


In [29]:
# Separate observation date to YEAR and MONTH
ff['Month'] = ff['observation_date'].dt.month
ff['Year'] = ff['observation_date'].dt.year

# Drop observation_date column
ff = ff.drop(columns=['observation_date'])

# Reorganize columns
ff = ff[['Year','Month','FEDFUNDS']]
ff

Unnamed: 0,Year,Month,FEDFUNDS
0,1954,7,0.80
1,1954,8,1.22
2,1954,9,1.07
3,1954,10,0.85
4,1954,11,0.83
...,...,...,...
810,2022,1,0.08
811,2022,2,0.08
812,2022,3,0.20
813,2022,4,0.33


In [30]:
# Sort
ff.sort_values(by = ['Year', 'Month'], ascending = [True, True])
ff

Unnamed: 0,Year,Month,FEDFUNDS
0,1954,7,0.80
1,1954,8,1.22
2,1954,9,1.07
3,1954,10,0.85
4,1954,11,0.83
...,...,...,...
810,2022,1,0.08
811,2022,2,0.08
812,2022,3,0.20
813,2022,4,0.33


## Remove unnecessary years (only want 2002 to present)

In [31]:
# Check Year values
ff['Year'].value_counts()

1988    12
2004    12
1990    12
1991    12
1992    12
        ..
1984    12
1985    12
2020    12
1954     6
2022     5
Name: Year, Length: 69, dtype: int64

In [32]:
# Remove years before 2002
ff = ff.loc[(ff['Year'] >= 2002)]
ff['Year'].value_counts()

2002    12
2013    12
2021    12
2020    12
2019    12
2018    12
2017    12
2016    12
2015    12
2014    12
2012    12
2003    12
2011    12
2010    12
2009    12
2008    12
2007    12
2006    12
2005    12
2004    12
2022     5
Name: Year, dtype: int64

In [33]:
# Check dtypes
print(ff.dtypes)

Year          int64
Month         int64
FEDFUNDS    float64
dtype: object


In [35]:
# Export to CSV
# ff.to_csv('Clean_Data/clean_fedfunds.csv', index=False)

# Clean GDP data

In [36]:
# Load file and Dataframe
gdp = pd.read_excel("Working_Folder/Data/GDP.xls")
gdp

Unnamed: 0,observation_date,GDP
0,1947-01-01,243.164
1,1947-04-01,245.968
2,1947-07-01,249.585
3,1947-10-01,259.745
4,1948-01-01,265.742
...,...,...
296,2021-01-01,22038.226
297,2021-04-01,22740.959
298,2021-07-01,23202.344
299,2021-10-01,24002.815


In [37]:
# Check dtypes
print(gdp.dtypes)

observation_date    datetime64[ns]
GDP                        float64
dtype: object


In [38]:
# Separate observation date to YEAR and MONTH
gdp['Month'] = gdp['observation_date'].dt.month
gdp['Year'] = gdp['observation_date'].dt.year

# Drop observation_date column
gdp = gdp.drop(columns=['observation_date'])

# Reorganize columns
gdp = gdp[['Year','Month','GDP']]

In [39]:
# Sort
gdp.sort_values(by = ['Year', 'Month'], ascending = [True, True])
gdp

Unnamed: 0,Year,Month,GDP
0,1947,1,243.164
1,1947,4,245.968
2,1947,7,249.585
3,1947,10,259.745
4,1948,1,265.742
...,...,...,...
296,2021,1,22038.226
297,2021,4,22740.959
298,2021,7,23202.344
299,2021,10,24002.815


In [40]:
# Check dtypes
print(gdp.dtypes)

Year       int64
Month      int64
GDP      float64
dtype: object


In [41]:
# Export to CSV
# gdp.to_csv('Clean_Data/clean_gdp.csv', index=False)