# Import Packages

In [None]:
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import adfuller

# Method 1 (X)

In [2]:
def method_1(file_name, df):
    df["method_1"] = df['Value']
    return df

# Method 2 (simple difference)

In [3]:
def method_2(file_name, df):
    # Initialize with 0 for the first row
    differences = [0]  
    # Iterate through the values in the 'Value' column and calculate the differences
    for i in range(1, len(df)):
        diff = df['Value'][i] - df['Value'][i - 1]
        differences.append(diff)
        
    df["method_2"] = differences
    return df

# Method 3 (squared difference)

In [4]:
def method_3(file_name, df):

    squared_differences = [0]  # Initialize with 0 for the first row

    # Iterate through the values in the 'Value' column and calculate the differences
    for i in range(1, len(df)):
        squared_diff = (df['Value'][i])**2 - (df['Value'][i - 1])**2
        squared_differences.append(squared_diff)

    # Add the 'Differences' list as a new column in the DataFrame
    df['method_3'] = squared_differences
    return df

# Method 4 (log-normal)

In [5]:
def method_4(file_name, df):

    log_divisions = [0]  # Initialize with 0 for the first row

    # Iterate through the values in the 'Value' column and calculate the differences
    for i in range(1, len(df)):
        log_div = np.log(df['Value'][i] / df['Value'][i - 1])
        log_divisions.append(log_div)

    # Add the 'Differences' list as a new column in the DataFrame
    df['method_4'] = log_divisions
    return df

# Apply methods on raw data

Daily time-series

In [14]:
# Store function names and number of methods
methods = ["method_1", "method_2", "method_3", "method_4"]
n_methods = len(methods)

# Store file names which contain the time-series
files_daily = ["Bond Index.csv", "Stock Index.csv", "Risk Free Rate.csv", "NIKKEI225.csv", "Crude Oil.csv", "USD to Pound.csv", "DGS2.csv", "DGS5.csv", "DGS10.csv", "T10Y3M.csv", "TEDRATE.csv"]
n_files = len(files_daily)

# Loop over each file, and apply each method to it
for file_name in files_daily:
    df = pd.read_csv("Daily Raw Data/" + file_name)
    df.replace("#N/A", np.nan, inplace=True)
    clean_df = df.dropna().reset_index(drop=True)
    for method in methods:
        function = globals()[method]
        function(file_name, clean_df)
    # Store series
    output_file_name = "Generated series/Daily/" + file_name
    clean_df.to_csv(output_file_name, index=False)

  squared_diff = (df['Value'][i])**2 - (df['Value'][i - 1])**2
  log_div = np.log(df['Value'][i] / df['Value'][i - 1])
  log_div = np.log(df['Value'][i] / df['Value'][i - 1])
  log_div = np.log(df['Value'][i] / df['Value'][i - 1])
  log_div = np.log(df['Value'][i] / df['Value'][i - 1])
  log_div = np.log(df['Value'][i] / df['Value'][i - 1])
  log_div = np.log(df['Value'][i] / df['Value'][i - 1])
  log_div = np.log(df['Value'][i] / df['Value'][i - 1])
  log_div = np.log(df['Value'][i] / df['Value'][i - 1])


Monthly time-series

In [16]:
# Store function names and number of methods
methods = ["method_1", "method_2", "method_3", "method_4"]
n_methods = len(methods)

# Store file names which contain the time-series
files_monthly = ["CPIAUCNS yoy.csv", "CSUSHPISA yoy.csv", "IQ yoy.csv", "IR yoy.csv", "MICH.csv", "PPIACO yoy.csv",  "INDPRO.csv", "PSAVERT.csv"]
n_files = len(files_monthly)

# Loop over each file, and apply each method to it
for file_name in files_monthly:
    df = pd.read_csv("Monthly raw data/" + file_name)
    df.replace("#N/A", np.nan, inplace=True)
    clean_df = df.dropna().reset_index(drop=True)
    for method in methods:
        function = globals()[method]
        function(file_name, clean_df)
    # Store series
    output_file_name = "Generated series/Monthly/" + file_name
    clean_df.to_csv(output_file_name, index=False)

  log_div = np.log(df['Value'][i] / df['Value'][i - 1])
  log_div = np.log(df['Value'][i] / df['Value'][i - 1])
  log_div = np.log(df['Value'][i] / df['Value'][i - 1])
  log_div = np.log(df['Value'][i] / df['Value'][i - 1])
  log_div = np.log(df['Value'][i] / df['Value'][i - 1])
  log_div = np.log(df['Value'][i] / df['Value'][i - 1])
  log_div = np.log(df['Value'][i] / df['Value'][i - 1])
  log_div = np.log(df['Value'][i] / df['Value'][i - 1])
  log_div = np.log(df['Value'][i] / df['Value'][i - 1])
  log_div = np.log(df['Value'][i] / df['Value'][i - 1])


# Dickey-Fuller Starionary Test (daily and monthy)

Stationary test for daily generated time-series

In [6]:
methods = ["method_1", "method_2", "method_3", "method_4"]
n_methods = len(methods)

files_daily = ["Bond Index.csv", "Stock Index.csv", "Risk Free Rate.csv", "NIKKEI225.csv", "Crude Oil.csv", "USD to Pound.csv", "DGS2.csv", "DGS5.csv", "DGS10.csv", "T10Y3M.csv", "TEDRATE.csv"]
n_files = len(files_daily)

p_values = np.zeros((n_methods, n_files))
t_statistics = np.zeros((n_methods, n_files))

for file_name, column in zip(files_daily, range(n_files)):
    df = pd.read_csv("Generated series/Daily/" + file_name)
    print( "\n", file_name)
    for method, row in zip(methods, range(n_files)):
        if file_name in ["Risk Free Rate.csv",  "T10Y3M.csv"] and method in ["method_4", "method_5"]:
            print("Do nothing")
        else:
            series = df[method] 
            series = series[1:]
            result = adfuller(series)
            t_statistics[row, column] = result[0]
            p_values[row, column] = result[1]
            print(method)
            

Bond Index.csv 

method_1
method_2
method_3
method_4
Stock Index.csv 

method_1
method_2
method_3
method_4
Risk Free Rate.csv 

method_1
method_2
method_3
Do nothing
NIKKEI225.csv 

method_1
method_2
method_3
method_4
Crude Oil.csv 

method_1
method_2
method_3
method_4
USD to Pound.csv 

method_1
method_2
method_3
method_4
DGS2.csv 

method_1
method_2
method_3
method_4
DGS5.csv 

method_1
method_2
method_3
method_4
DGS10.csv 

method_1
method_2
method_3
method_4
T10Y3M.csv 

method_1
method_2
method_3
Do nothing
TEDRATE.csv 

method_1
method_2
method_3
method_4


Extract results

In [7]:
df_p_values = pd.DataFrame(p_values, columns=files_daily, index=methods)
df_p_values.to_csv("Dickey-Fuller Results/Daily P-value Results.csv")

df_t_statistics = pd.DataFrame(t_statistics, columns=files_daily, index=methods)
df_t_statistics.to_csv("Dickey-Fuller Results/Daily T-statistics Results.csv")

Stationary test for monthly generated time-series

In [17]:
methods = ["method_1", "method_2", "method_3", "method_4"]
n_methods = len(methods)

files_monthly = ["CPIAUCNS yoy.csv", "CSUSHPISA yoy.csv", "IQ yoy.csv", "IR yoy.csv", "MICH.csv", "PPIACO yoy.csv",  "INDPRO.csv", "PSAVERT.csv"]
n_files = len(files_monthly)

t_statistics = np.zeros((n_methods, n_files))
p_values = np.zeros((n_methods, n_files))

for file_name, column in zip(files_monthly, range(n_files)):
    df = pd.read_csv("Generated series/Monthly/" + file_name)
    print("\n", file_name)
    for method, row in zip(methods, range(n_files)):
        if file_name in ["CPIAUCNS yoy.csv", "CSUSHPISA yoy.csv", "IQ yoy.csv", "IR yoy.csv", "PPIACO yoy.csv"] and method == "method_4":
            print("Do nothing")
        else:
            series = df[method] 
            series = series[1:]
            result = adfuller(series)
            t_statistics[row, column] = result[0]
            p_values[row, column] = result[1]
            print(method)


 CPIAUCNS yoy.csv
method_1
method_2
method_3
Do nothing

 CSUSHPISA yoy.csv
method_1
method_2
method_3
Do nothing

 IQ yoy.csv
method_1
method_2
method_3
Do nothing

 IR yoy.csv
method_1
method_2
method_3
Do nothing

 MICH.csv
method_1
method_2
method_3
method_4

 PPIACO yoy.csv
method_1
method_2
method_3
Do nothing

 INDPRO.csv
method_1
method_2
method_3
method_4

 PSAVERT.csv
method_1
method_2
method_3
method_4


Extract results

In [18]:
df_p_values = pd.DataFrame(p_values, columns=files_monthly, index=methods)
df_p_values.to_csv("Dickey-Fuller Results/Monthly P-value Results.csv")

df_t_statistics = pd.DataFrame(t_statistics, columns=files_monthly, index=methods)
df_t_statistics.to_csv("Dickey-Fuller Results/Monthly T-statistics Results.csv")