# Speciale

Af Cecilie og Emma

Vejledt af Heino Bohn Nielsen

In [1]:
# Import relevant packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller

## Data
### Data description
Data is from Federal Reserve Economic Data (FRED). It is the quarterly database for macroeconomic research (FRED-QD). The database is updated in real time using the FRED database. A full description of data can be found here:

https://s3.amazonaws.com/real.stlouisfed.org/wp/2020/2020-005.pdf

https://files.stlouisfed.org/files/htdocs/publications/review/2021/01/14/fred-qd-a-quarterly-database-for-macroeconomic-research.pdf

The database covers the period of 1959-Q1 to 2023-Q4.

In [2]:
# Import data (.csv file) as a pandas dataframe
macro_data = pd.read_csv("macro_data.csv")
macro_data = macro_data.iloc[2:].reset_index(drop=True)

# macro_data.info()
# macro_data.describe()

In [3]:
# Handeling missing values

# Count the number of missing values in each column
# print(macro_data.isna().sum())

# Find the five percent threshold
threshold = 12
# threshold = len(macro_data) * 0.05

# Create a filter
cols_to_drop = macro_data.columns[macro_data.isna().sum() <= threshold]

# Drop missing values for columns below the threshold
macro_data.dropna(subset=cols_to_drop, inplace=True)

macro_data_na = macro_data.isna().sum()

# print(cols_to_drop)

# with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
#    print(macro_data_na)

In [4]:
# Reshape data into long format
macro_data_long = macro_data.melt(id_vars=['sasdate'])
macro_data_long['sasdate'] = pd.to_datetime(macro_data_long['sasdate'])
macro_data_long

Unnamed: 0,sasdate,variable,value
0,1960-03-01,GDPC1,3517.1810
1,1960-06-01,GDPC1,3498.2460
2,1960-09-01,GDPC1,3515.3850
3,1960-12-01,GDPC1,3470.2780
4,1961-03-01,GDPC1,3493.7030
...,...,...,...
62479,2022-06-01,S&P PE ratio,21.6306
62480,2022-09-01,S&P PE ratio,20.0276
62481,2022-12-01,S&P PE ratio,19.4586
62482,2023-03-01,S&P PE ratio,20.8628


## Transformations

In appendix to the database we find descriptions and data transformations for the series. The column "tcode" tells how to transform the series to that they are stationary. 

It consists of the following data transformations:

1. No transformation
2. $Δx_t$
3. $Δx_t-Δx_{t-1}$
4. $\log(x_t)$
5. $\Delta\log(x_t)$
6. $Δ\log(x_t)-Δ\log(x_{t-1})$
7. $\Delta(x_t/x_{t-1}-1)$

In [5]:
# Import appendix
appendix_data = pd.read_csv("FRED-QD_updated_appendix.csv", sep=";", encoding='unicode_escape')
appendix_data_sub = appendix_data[['FRED MNEMONIC','TCODE']]
# From the appendix data, we need the TCODE column

In [6]:
# Join TCODE onto data
joined_data = macro_data_long.merge(appendix_data_sub, left_on='variable', right_on='FRED MNEMONIC')
joined_data.head()

Unnamed: 0,sasdate,variable,value,FRED MNEMONIC,TCODE
0,1960-03-01,GDPC1,3517.181,GDPC1,5
1,1960-06-01,GDPC1,3498.246,GDPC1,5
2,1960-09-01,GDPC1,3515.385,GDPC1,5
3,1960-12-01,GDPC1,3470.278,GDPC1,5
4,1961-03-01,GDPC1,3493.703,GDPC1,5


In [7]:
# Map transformation codes to functions
def transform(series, code):
    if code == 1:
        # No transformation
        return series
    elif code == 2:
        # First difference
        return series.diff()
    elif code == 3:
        # Second difference
        return series.diff().diff()
    elif code == 4:
        # Logarithm
        return np.log(series)
    elif code == 5:
        # First difference of log
        return np.log(series).diff()
    elif code == 6:
        # First difference of log minus lagged first difference of log
        return np.log(series).diff() - np.log(series).diff().shift(1)
    elif code == 7:
        # First difference of the rate of change
        return (series / series.shift(1) - 1).diff()
    else:
        raise ValueError(f"Unknown transformation code: {code}")

In [8]:
# Define function to transform column
def apply_transformations(df, value_column='value', tcode_column='TCODE'):
    # Create an empty DataFrame to store the transformed values
    transformed_df = pd.DataFrame()

    # Group by the 'variable' column and apply the transformation for each group
    for variable, group in df.groupby('variable'):
        # Get the transformation code for this variable (assuming it's constant within the group)
        tcode = group[tcode_column].iloc[0]
        # Apply the transformation based on the tcode
        group['transformed_value'] = transform(group[value_column], tcode)
        # Append the transformed group to the transformed_df DataFrame
        transformed_df = pd.concat([transformed_df, group])

    return transformed_df

In [20]:
# Adding a column with tranformations of each data
transformed_df = apply_transformations(joined_data)
transformed_df.head()

Unnamed: 0,sasdate,variable,value,FRED MNEMONIC,TCODE,transformed_value
2540,1960-03-01,A014RE1Q156NBEA,2.1,A014RE1Q156NBEA,1,2.1
2541,1960-06-01,A014RE1Q156NBEA,0.6,A014RE1Q156NBEA,1,0.6
2542,1960-09-01,A014RE1Q156NBEA,0.8,A014RE1Q156NBEA,1,0.8
2543,1960-12-01,A014RE1Q156NBEA,-1.1,A014RE1Q156NBEA,1,-1.1
2544,1961-03-01,A014RE1Q156NBEA,-0.5,A014RE1Q156NBEA,1,-0.5


In [14]:
def test_for_unit_roots_5pct(df, variable_column, transformed_value_column):
    results = []
    for variable, group in df.groupby(variable_column):
        time_series = group[transformed_value_column].dropna()  # Ensure there are no NaNs
        adf_result = adfuller(time_series)
        test_statistic = adf_result[0]
        critical_value_5pct = adf_result[4]['5%']
        p_value = adf_result[1]
        # Check if the test statistic is less than the critical value at 5%
        reject_null = test_statistic < critical_value_5pct
        results.append({
            'variable': variable,
            'ADF Statistic': test_statistic,
            'p-value': p_value,
            'Reject Null Hypothesis at 5%': reject_null
        })
    # Convert the results to a DataFrame
    adf_df = pd.DataFrame(results)
    return adf_df

In [17]:
# Test for stationarity of the transformed time series
adf_tests = test_for_unit_roots_5pct(transformed_df, 'variable', 'transformed_value')
# adf_tests.to_csv('adf_tests.csv', index=False)


## Investigating dpendent variable: Growth rate of GDP

In [19]:
gdp_df = transformed_df[transformed_df['variable'] == 'GDP']
gdp_df.head()

Unnamed: 0,sasdate,variable,value,FRED MNEMONIC,TCODE,transformed_value
