In [10]:
%run EconIndicators.ipynb

# Importing libraries for regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn import linear_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

from sklearn.metrics import mean_squared_error, r2_score

In [2]:
rename_dict = {
    'FRED/DEXUSAL':'AUS',
    'FRED/DEXBZUS':'BRA',
    'FRED/DEXUSUK':'GBR',
    'FRED/DEXCAUS':'CAD',
    'FRED/DEXCHUS':'CHN',
    'FRED/DEXUSEU':'EUU',
    'FRED/DEXINUS':'IND',
    'FRED/DEXJPUS':'JPN',
    'FRED/DEXMAUS':'MYS', 
    'FRED/DEXMXUS':'MEX',
    'FRED/DEXUSNZ':'NZL', 
    'FRED/DEXNOUS':'NOR',
    'FRED/DEXSIUS':'SGP',
    'FRED/DEXSFUS':'ZAF',
    'FRED/DEXKOUS':'KOR',
    'FRED/DEXSDUS':'SWE',
    'FRED/DEXSZUS':'CHE',
    'FRED/DEXTHUS':'THB'
}

In [3]:
def getFeatures():
    """
    Getting features required for training and testing ML models
    """
    # Getting Training Economic Indicators from World Bank (From EconIndicators.ipynb)
    raw_data = getAllIndicatorsTraining(20, full_country_basket)
    norm_data = relativeStrength(raw_data)
    
    # Reading in annualized volatility data for training
    avol_csv = pd.read_csv("annualized_volatility.csv", index_col = "Date")
    avol_csv = avol_csv.loc[2004:]
    avol_csv.rename(columns = rename_dict, inplace = True)
    avol_csv.drop(columns = ['FRED/DEXDNUS', 'FRED/DEXHKUS', 'FRED/DEXTAUS', 'FRED/DEXVZUS'], inplace = True)
    
    # Creating df to match the format of the df provided by EconIndicators
    avol_dict = {}
    for country in avol_csv.columns:
        for year in avol_csv[country].index:
            # Subtracting a year as the previous year's volatility will be predicting the next 
            avol_dict[country + str(year-1)] = avol_csv[country][year]

    avol_df = pd.DataFrame.from_dict(avol_dict, orient = "index", columns = ["Annual Volatility"])
    
    features = pd.concat([norm_data, avol_df], axis = "columns", join = "inner")
    
    return features

In [21]:
def trainEconLinearRegresion(features):
    """
    Trains a linear regression model on the given input features. Returns the fitted model, comparison df, 
    and the mse of the test cohort 
    """
    
    X = features.drop(columns = ["Year", "Annual Volatility"])
    y = features["Annual Volatility"]
    
    # Establishing test/train split for Linear Regression
    X_lin_train, X_lin_test, y_lin_train, y_lin_test = train_test_split(X, y, random_state = 1)

    # Creating and fitting model
    model_lin = linear_model.LinearRegression()
    model_lin.fit(X_lin_train, y_lin_train)

    # Predicting results and creating df for comparison
    comp_df = y_lin_test.to_frame()
    predictions_lin = model_lin.predict(X_lin_test)
    comp_df["Predicted Volatility"] = predictions_lin
    
    mse = mean_squared_error(y_lin_test, predictions_lin)
    
    return model_lin, comp_df, mse

In [28]:
def trainEconDeepLearning(features):
    """
    Trains a deep learning model on the given input features. Returns the fitted model, comparison df, scaler,
    and the mse of the test cohort
    """
    
    X = features_df.drop(columns = ["Year", "Annual Volatility"])
    y = features_df["Annual Volatility"]
    
    # Establishing test/train split for Deep Learning Model
    X_dl_train, X_dl_test, y_dl_train, y_dl_test = train_test_split(X, y, random_state = 1)

    # Scaling features for model
    scaler = StandardScaler().fit(X_dl_train)
    X_dl_train_sc = scaler.transform(X_dl_train)
    X_dl_test_sc = scaler.transform(X_dl_test)

    # Create the model
    number_inputs = X.shape[1]
    number_hidden_nodes = 10

    neuron = Sequential()

    neuron.add(Dense(units = number_hidden_nodes, activation = "relu", input_dim = number_inputs))
    neuron.add(Dense(units = number_hidden_nodes, activation = "relu"))
    neuron.add(Dense(1))

    # Compiling model
    neuron.compile(optimizer = "adam", loss = "mean_squared_error")

    # Training the model
    neuron.fit(X_dl_train_sc, y_dl_train, epochs = 1000, batch_size = 10, verbose = 0)
    
    comp_df = y_dl_test.to_frame()
    predictions_dl = neuron.predict(X_dl_test_sc)
    comp_df["Predicted Volatility"] = predictions_dl
    
    mse = mean_squared_error(y_dl_test, predictions_dl)
    
    return neuron, comp_df, scaler, mse