# Problem Set 1

In [1]:
import numpy as np
import pandas as pd
import datetime
import pandas_datareader
import matplotlib.pyplot as plt

## 0. Example Code

Below is some example code to get you started. It should help you get familiar with some of the syntax and functions you will be using in this assignment. 

In [2]:
# Use the datetime module - a way to handle dates in python - to create variables for the start and end dates of the data you want to download. 
example_date = datetime.datetime(2020, 1, 1) 
print("example_data is an instance of the class ", type(example_date)) # This will print the type of the variable, which should be datetime.datetime
print(example_date) # This will print the date in the format YYYY-MM-DD HH:MM:SS
print("example_date.year is ", example_date.year) # This will print the year of the date
print("example_date.month is ", example_date.month) # This will print the month of the date
print("example_date.day is ", example_date.day) # This will print the day of the date

example_data is an instance of the class  <class 'datetime.datetime'>
2020-01-01 00:00:00
example_date.year is  2020
example_date.month is  1
example_date.day is  1


In [3]:
# Get more help on the datetime class and its methods
help(datetime.datetime)

Help on class datetime in module datetime:

class datetime(date)
 |  datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]])
 |
 |  The year, month and day arguments are required. tzinfo may be None, or an
 |  instance of a tzinfo subclass. The remaining arguments may be ints.
 |
 |  Method resolution order:
 |      datetime
 |      date
 |      builtins.object
 |
 |  Methods defined here:
 |
 |  __add__(self, value, /)
 |      Return self+value.
 |
 |  __eq__(self, value, /)
 |      Return self==value.
 |
 |  __ge__(self, value, /)
 |      Return self>=value.
 |
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |
 |  __gt__(self, value, /)
 |      Return self>value.
 |
 |  __hash__(self, /)
 |      Return hash(self).
 |
 |  __le__(self, value, /)
 |      Return self<=value.
 |
 |  __lt__(self, value, /)
 |      Return self<value.
 |
 |  __ne__(self, value, /)
 |      Return self!=value.
 |
 |  __radd__(self, value, /)
 |      Return value+

In [4]:
# Specify the start and end dates for the data you want to download.
start = datetime.datetime(1955,1,1)
end = datetime.datetime(2024,1,1)

datelist = pd.date_range(start=start, end=end, freq='QS') # freq='QS' means quarterly frequency at the start of the quarter
# We are assigning a value to a variable named 'datelist' here.
print('total number of quarters:', len(datelist)) # The 'len' function returns the number of elements in the 'datelist'. It's a command that you can use to check the length of any list or array in Python.
print('first quarter:', datelist[0]) # This will print the first quarter in the list, which is the first date in the list.
print('last quarter:', datelist[-1]) # This will print the last quarter in the list, which is the last date in the list.

total number of quarters: 277
first quarter: 1955-01-01 00:00:00
last quarter: 2024-01-01 00:00:00


In [5]:
# List the series names to download
indicator_list = ['GDP']

# Use the pandas_datareader library to download the data from the Federal Reserve Economic Data (FRED) database. The 'DataReader' function takes the list of series names, the source ('fred'), and the start and end dates as arguments.

# We can pull from multiple sources in one go. Just combine them in a list.
df = pandas_datareader.data.DataReader(indicator_list, 'fred', start, end)
df.head(5) # Use .head(5) to print the first 5 rows of the dataframe. This is a good way to check if the data was downloaded correctly and to see the structure of the dataframe.

Unnamed: 0_level_0,GDP
DATE,Unnamed: 1_level_1
1955-01-01,413.073
1955-04-01,421.532
1955-07-01,430.221
1955-10-01,437.092
1956-01-01,439.746


In [6]:
df.info() # This will print a summary of the dataframe, including the number of non-null values and the data types of each column.

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 277 entries, 1955-01-01 to 2024-01-01
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   GDP     277 non-null    float64
dtypes: float64(1)
memory usage: 4.3 KB


In [7]:
# Example of creating a new column in a dataframe:
df['new_column'] = df['GDP'] * 2 # multiply every element times 2
df.head(5) # Check the new column

Unnamed: 0_level_0,GDP,new_column
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1
1955-01-01,413.073,826.146
1955-04-01,421.532,843.064
1955-07-01,430.221,860.442
1955-10-01,437.092,874.184
1956-01-01,439.746,879.492


In [8]:
# You might want to merge two dataframes with different frequencies. For example, you might have a dataframe with quarterly data and another with monthly data. If you want all your data in quarterly frequency, you can use the 'asfreq' or 'resample' method to convert the monthly data to quarterly frequency.

df2 = pandas_datareader.data.DataReader('INDPRO', 'fred', start, end) # Industrial production is often used as a proxy for GDP that is available at monthly frequency
print(df2.index) # Note that this seems to be monthly frequency

# This is a more manual way of downsamling.
mask = df2.index.isin(df.index) # It checks for each index element in df2 if it is also in df. The result is a boolean array of the same length as df2, where each element is True if the corresponding index in df2 is also in df, and False otherwise.
print(mask)

df2_quarterly = df2[mask] # This will filter df2 to only the dates that correspond to the beginning of quarters in df.
df2_quarterly.head(5)

DatetimeIndex(['1955-01-01', '1955-02-01', '1955-03-01', '1955-04-01',
               '1955-05-01', '1955-06-01', '1955-07-01', '1955-08-01',
               '1955-09-01', '1955-10-01',
               ...
               '2023-04-01', '2023-05-01', '2023-06-01', '2023-07-01',
               '2023-08-01', '2023-09-01', '2023-10-01', '2023-11-01',
               '2023-12-01', '2024-01-01'],
              dtype='datetime64[ns]', name='DATE', length=829, freq=None)
[ True False False  True False False  True False False  True False False
  True False False  True False False  True False False  True False False
  True False False  True False False  True False False  True False False
  True False False  True False False  True False False  True False False
  True False False  True False False  True False False  True False False
  True False False  True False False  True False False  True False False
  True False False  True False False  True False False  True False False
  True False False  True 

Unnamed: 0_level_0,INDPRO
DATE,Unnamed: 1_level_1
1955-01-01,19.2735
1955-04-01,20.2143
1955-07-01,20.7251
1955-10-01,21.182
1956-01-01,21.4508


In [9]:
# one way to connect the dataframes:
help(pd.concat)

Help on function concat in module pandas.core.reshape.concat:

concat(objs: 'Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame]', *, axis: 'Axis' = 0, join: 'str' = 'outer', ignore_index: 'bool' = False, keys: 'Iterable[Hashable] | None' = None, levels=None, names: 'list[HashableT] | None' = None, verify_integrity: 'bool' = False, sort: 'bool' = False, copy: 'bool | None' = None) -> 'DataFrame | Series'
    Concatenate pandas objects along a particular axis.

    Allows optional set logic along the other axes.

    Can also add a layer of hierarchical indexing on the concatenation axis,
    which may be useful if the labels are the same (or overlapping) on
    the passed axis number.

    Parameters
    ----------
    objs : a sequence or mapping of Series or DataFrame objects
        If a mapping is passed, the sorted keys will be used as the `keys`
        argument, unless it is passed, in which case the values will be
        selected (see below). Any None objects

In [10]:
# Have a look at the pandas cheat sheet for some illustrations of how to combine dataframes
df_combined = pd.concat([df, df2_quarterly], axis=1) # This will concatenate the two dataframes along the columns (axis=1). The result is a new dataframe that contains all the columns from both dataframes.
df_combined.head(5) # Check the new dataframe

Unnamed: 0_level_0,GDP,new_column,INDPRO
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1955-01-01,413.073,826.146,19.2735
1955-04-01,421.532,843.064,20.2143
1955-07-01,430.221,860.442,20.7251
1955-10-01,437.092,874.184,21.182
1956-01-01,439.746,879.492,21.4508


## 1. Macroeconomic Data

### 1.1: 
Download quarterly data on nominal GDP and the GDP deflator using the Pandas datareader library and the codes ’GDP’ and ’GDPDEF’. From this data calculate a series for real GDP and inflation.

In [None]:
# Need first to encode dates in a python friendly to specify the length of the desired time period. 
# Use the datetime module - it is the general way to handle dates in python. 
start = datetime.datetime(YYYY,MM,DD)
end = 

datelist = pd.date_range(start=start, end=end, freq='QS') # freq='QS' means quarterly frequency at the start of the quarter
print('total number of quarters:', len(datelist))

total number of quarters: 277


In [None]:
# list the series names to download
indicator_list = 

# We can pull from multiple sources in one go. Just combine them in a list.
df = pandas_datareader.data.DataReader(indicator_list, 'fred', start, end)
df.head(5)

Unnamed: 0_level_0,GDP,GDPDEF
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1
1955-01-01,413.073,13.675
1955-04-01,421.532,13.731
1955-07-01,430.221,13.827
1955-10-01,437.092,13.964
1956-01-01,439.746,14.104


In [None]:
# Calculate real GDP
df['RGDP'] = 
df.head(5)

Unnamed: 0_level_0,GDP,GDPDEF,RGDP
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1955-01-01,413.073,13.675,3020.64351
1955-04-01,421.532,13.731,3069.929357
1955-07-01,430.221,13.827,3111.455847
1955-10-01,437.092,13.964,3130.134632
1956-01-01,439.746,14.104,3117.881452


### 1.2. 
Calculate the quarterly growth rate for real GDP.

In [None]:
# use build in function
df['RGDP_qoq'] =
df['INFL'] = 

Unnamed: 0_level_0,GDP,GDPDEF,RGDP,RGDP_qoq,INFL
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1955-01-01,413.073,13.675,3020.64351,,
1955-04-01,421.532,13.731,3069.929357,0.016316,0.409506
1955-07-01,430.221,13.827,3111.455847,0.013527,0.699148
1955-10-01,437.092,13.964,3130.134632,0.006003,0.990815
1956-01-01,439.746,14.104,3117.881452,-0.003915,1.002578


### 1.3: 
Also download quarterly data on unemployment and the federal funds fate from FRED and combine the data for real GDP, inflation, unemployment and the federal funds rate at quarterly frequency in one DataFrame.

In [None]:
# You might want to check the fred webpage for the codes of the variables you need
indicator_list = ['???', 'DFF']

df2 = pandas_datareader.data.DataReader(indicator_list, 'fred', start, end)
df2.head()

Unnamed: 0_level_0,UNRATE,DFF
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1
1955-01-01,4.9,1.44
1955-01-02,,1.44
1955-01-03,,1.44
1955-01-04,,1.25
1955-01-05,,1.0


In [None]:
# Check if the frequency of the data fits. If not, you might want to resample the data before merging.
df2.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 25203 entries, 1955-01-01 to 2024-01-01
Freq: D
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   UNRATE  829 non-null    float64
 1   DFF     25203 non-null  float64
dtypes: float64(2)
memory usage: 590.7 KB


### 1.4:
Use the Matplotlib library to create one plot, with 4 subplots in a 2x2 layout, containing the variables downloaded in the previous task.

In [None]:
# This is how you would plot a single figure with the unemployment rate
plt.figure(figsize=(12, 8))
plt.plot(df.index, df['UNRATE'], label='Unemployment Rate', color='blue')
plt.title('Unemployment Rate')
plt.xlabel('Date')
plt.ylabel('%')
plt.legend()

## 2: Effect of Government Investment on Unemployment

### 2.1 
Download data on GDP (’GDPC1’), government spending (’GCEC1’) and unemployment (’UNRATE’) from FRED.

### 2.2
Add new columns transforming GDP and government spending to a year-over-year growth rate in percent.

### 2.3
Use the Matplotlib library to plot the unemployment rate and government spending as well as its year-over-year growth rate.

### 2.4

Carry out the following regressions with unemployment as the dependent variable and the following independent variables:

(a) Government spending.

(b) Growth in government spending.

(c) Growth in government spending and GDP.