# Deep Learning Results for Eurostat Data

This file performs Deep Learning Analysis for on Eurostat Energy supply data where missing data is 
1) imputed with the mean
2) excluded

## Load Packages

In [34]:
from tensorflow.keras.layers import LSTM, Flatten, Dense
from tensorflow.keras.models import Sequential
import tensorflow.keras.backend as K
import numpy as np
import pandas as pd
import tensorflow as tf
import os
import sys
import pickle
import sklearn
from sklearn.datasets import make_classification
from sklearn.naive_bayes import GaussianNB
from sklearn.calibration import CalibratedClassifierCV
from keras.callbacks import EarlyStopping
import random

In [27]:
# temporarily surpress output
sys.stdout = open(os.devnull, "w")
sys.stderr = open(os.devnull, "w")
# to return it
#sys.stdout = sys.__stdout__
#sys.stderr = sys.__stderr__

## Load Data

In [15]:
# Read data using pandas
data_full = pd.read_csv("/Users/elizabeth/Documents/Master's Project/Data/EU_TotalEnergySupply.csv")

# Create list of column names
data_full.columns.values.tolist()
# Rename columns to make life easier
data_full.columns = [c.replace(' ', '_') for c in data_full.columns] # remove spaces
data_full.columns = [c.replace('(', '') for c in data_full.columns] # remove open parenthesis
data_full.columns = [c.replace(')', '') for c in data_full.columns] # remove close parenthesis
data_full.columns.values.tolist()

# Get rid of ':' and shorten other names
data_full = data_full.replace([':'],'')
data_full = data_full.replace(['European Union - 27 countries (from 2020)'],'EU')
data_full = data_full.replace(['Euro area - 19 countries  (from 2015)'],'Euro area')
data_full = data_full.replace(['Germany (until 1990 former territory of the FRG)'],'Germany')
data_full = data_full.replace(['Kosovo (under United Nations Security Council Resolution 1244/99)'],'Kosovo')

# Change Data type to numeric
data_full[data_full.columns[2:]] = data_full[data_full.columns[2:]].apply(pd.to_numeric, errors ='coerce')

Impute the data

In [17]:
## 1) Impute data with the average
frames = []
for i in list(set(data_full['Country'])):
            df_country = data_full[data_full['Country'] == i] 
            df_country['Total_GWH'].fillna(df_country['Total_GWH'].mean(),inplace = True)
            df_country['Solid_fossil_fuels'].fillna(df_country['Solid_fossil_fuels'].mean(), inplace = True)
            df_country['Peat_and_peat_products'].fillna(df_country['Peat_and_peat_products'].mean(), inplace = True)
            df_country['Solar_Thermal'].fillna(df_country['Solar_Thermal'].mean(), inplace = True)
            df_country['Oil_and_petroleum_products'].fillna(df_country['Oil_and_petroleum_products'].mean(), inplace = True)
            df_country['Natural_gas'].fillna(df_country['Natural_gas'].mean(), inplace = True)
            df_country['Renewables_and_biofuels'].fillna(df_country['Renewables_and_biofuels'].mean(), inplace = True)
            df_country['Nuclear_heat'].fillna(df_country['Nuclear_heat'].mean(),inplace = True)
            df_country['Hydro'].fillna(df_country['Hydro'].mean(),inplace = True)
            df_country['Geothermal'].fillna(df_country['Geothermal'].mean(),inplace = True)
            df_country['Ambient_Heat'].fillna(df_country['Ambient_Heat'].mean(),inplace = True)
            df_country['Tide_wave_and_ocean'].fillna(df_country['Tide_wave_and_ocean'].mean(),inplace = True)
            df_country['Wind'].fillna(df_country['Wind'].mean(),inplace = True)
            df_country['Biofuels_solid'].fillna(df_country['Biofuels_solid'].mean(),inplace = True)
            df_country['Biofuels_other'].fillna(df_country['Biofuels_other'].mean(),inplace = True)
            df_country['Biofuels'].fillna(df_country['Biofuels'].mean(),inplace = True)
            frames.append(df_country)
            final_df = pd.concat(frames)
data_impute = final_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(


## Results for Imputed Data

Full Data

In [11]:
cdat = data_impute.loc[:,['Solid_fossil_fuels','Peat_and_peat_products','Oil_and_petroleum_products', 
                          'Natural_gas', 'Nuclear_heat', 'Hydro', 'Solar_Thermal', 'Geothermal', 
                          'Ambient_Heat', 'Tide_wave_and_ocean', 'Biofuels_solid', 'Biofuels_other']]

Unnamed: 0,Country,Year,Total_GWH,Solid_fossil_fuels,Peat_and_peat_products,Oil_and_petroleum_products,Natural_gas,Renewables_and_biofuels,Nuclear_heat,Solar_Thermal,Wind,Hydro,Geothermal,Ambient_Heat,Tide_wave_and_ocean,Biofuels_solid,Biofuels_other,Biofuels
22,Poland,2020,1192325.452,475838.038,0.0,337028.005,202831.135,150618.088,0.000,932.074,15800.049,2118.337,298.189,3467.032,0.0,108513.529,22.718,108536.247
64,Poland,2019,1221101.488,509869.725,0.0,351509.998,188730.587,147723.892,0.000,835.760,15106.759,1958.416,291.777,2966.955,0.0,109250.039,23.361,109273.400
106,Poland,2018,1267323.825,572778.420,0.0,346439.617,187523.320,142875.170,0.000,662.043,12798.792,1969.997,275.296,2488.450,0.0,109250.464,22.527,109272.991
148,Poland,2017,1213879.094,577683.390,0.0,338653.065,179624.048,105218.913,0.000,633.464,14909.041,2559.581,262.649,2134.233,0.0,73161.750,21.956,73183.706
190,Poland,2016,1161827.825,576113.505,0.0,301033.707,170186.500,103619.724,0.000,608.056,12587.590,2139.446,258.333,1824.858,0.0,76992.500,20.856,77013.356
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
214,Czechia,2015,485637.458,190669.319,0.0,100142.154,75394.500,50995.877,77692.497,183.836,572.610,1794.769,0.000,1256.091,0.0,33421.944,0.000,33421.944
256,Czechia,2014,485202.472,185755.248,0.0,102476.155,71898.000,49645.373,88745.807,171.855,476.544,1908.749,0.000,1098.968,0.0,32134.167,0.000,32134.167
298,Czechia,2013,501981.172,200923.005,0.0,96327.105,80786.750,48042.219,90231.594,159.075,480.519,2734.507,0.000,957.408,0.0,30819.167,0.000,30819.167
340,Czechia,2012,499492.160,201002.605,0.0,100292.171,79736.500,43695.767,89248.451,146.117,415.817,2129.143,0.000,815.528,0.0,29505.278,0.000,29505.278


In [12]:
n_assets = 12
data = cdat
    
class Model:
    def __init__(self):
        self.data = None
        self.model = None
        
        # self.callback = EarlyStopping(monitor='loss', min_delta=.1, patience = 10)
    
    def __build_model(self, input_shape, outputs):
        model = Sequential([
            LSTM(64, input_shape=input_shape, activation='relu'),
            Flatten(),
            Dense(outputs, activation='softmax')
        ])

        def sharpe_loss(_, y_pred):
            coeffs = tf.tile(y_pred, (self.data.shape[0], 1))
            
            portfolio_values = tf.reduce_sum(tf.multiply(coeffs, self.data), axis=1)
            
            portfolio_returns = (portfolio_values[1:] - portfolio_values[:-1]) / portfolio_values[:-1]  # % change formula

            sharpe = K.mean(portfolio_returns) / K.std(portfolio_returns)
            
            # exp keeps relative ordering between positives and negatives
            #   since we want to maximize sharp, while gradient descent minimizes the loss
            #   we negate the Sharpe value
            return K.exp(-sharpe)
        
        model.compile(loss=sharpe_loss, optimizer='adam')
        return model
    
    def get_allocations(self, data):
        
        
        # data with returns
        data_w_ret = np.concatenate([ data.values[1:], data.pct_change().values[1:] ], axis=1)
        
        data = data.iloc[1:]
        self.data = tf.cast(tf.constant(data), float)
        
        if self.model is None:
            self.model = self.__build_model(data_w_ret.shape, len(data.columns))
        
        fit_predict_data = data_w_ret[np.newaxis,:]        
        self.model.fit(fit_predict_data, np.zeros((1, len(data.columns))), epochs=10, shuffle=False)
        return self.model.predict(fit_predict_data)[0]
    
model = Model() 
weights = model.get_allocations(pd.DataFrame(np.random.randn(n_assets,n_assets)))
weight_fulldata = weights
#return_fulldata = np.zeros(weight_fulldata.shape)
#risks_fulldata = np.zeros(weight_fulldata.shape)
#for j in range(n_assets):
#    return_fulldata[j] = np.sum(weight_fulldata[j] * data[i][j])
#    risks_fulldata[j] = weight_fulldata[i,j] * np.diagonal(np.cov(data[i].T))[j] * weight_fulldata[i,j]
    

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Drop one country at a time

In [30]:
# full data with country and year
cdat = data_impute.loc[:,['Country','Year','Solid_fossil_fuels','Peat_and_peat_products','Oil_and_petroleum_products', 
                          'Natural_gas', 'Nuclear_heat', 'Hydro', 'Solar_Thermal', 'Geothermal', 
                          'Ambient_Heat', 'Tide_wave_and_ocean', 'Biofuels_solid', 'Biofuels_other']]

In [28]:
weight_country = np.zeros((len(data_impute.groupby('Country')),len(cdat.T)-2))
return_country = np.zeros((len(data_impute.groupby('Country')),len(cdat.T)-2))
risks_country = np.zeros((len(data_impute.groupby('Country')),len(cdat.T)-2))

for i in range(len(data_impute.groupby('Country'))):
    country_dat = cdat.loc[(cdat.Country != cdat.Country.unique()[i])]
    country_dat = country_dat.loc[:,['Solid_fossil_fuels','Peat_and_peat_products','Oil_and_petroleum_products', 
                          'Natural_gas', 'Nuclear_heat', 'Hydro', 'Solar_Thermal', 'Geothermal', 
                          'Ambient_Heat', 'Tide_wave_and_ocean', 'Biofuels_solid', 'Biofuels_other']]
    data = country_dat
    
    class Model:
        def __init__(self):
            self.data = None
            self.model = None
        
            # self.callback = EarlyStopping(monitor='loss', min_delta=.1, patience = 10)
    
        def __build_model(self, input_shape, outputs):
            model = Sequential([
                LSTM(64, input_shape=input_shape, activation='relu'),
                Flatten(),
                Dense(outputs, activation='softmax')
            ])

            def sharpe_loss(_, y_pred):
                coeffs = tf.tile(y_pred, (self.data.shape[0], 1))
            
                portfolio_values = tf.reduce_sum(tf.multiply(coeffs, self.data), axis=1)
            
                portfolio_returns = (portfolio_values[1:] - portfolio_values[:-1]) / portfolio_values[:-1]  # % change formula

                sharpe = K.mean(portfolio_returns) / K.std(portfolio_returns)
            
                # exp keeps relative ordering between positives and negatives
                #   since we want to maximize sharp, while gradient descent minimizes the loss
                #   we negate the Sharpe value
                return K.exp(-sharpe)
        
            model.compile(loss=sharpe_loss, optimizer='adam')
            return model
    
        def get_allocations(self, data):
        
        
            # data with returns
            data_w_ret = np.concatenate([ data.values[1:], data.pct_change().values[1:] ], axis=1)
        
            data = data.iloc[1:]
            self.data = tf.cast(tf.constant(data), float)
        
            if self.model is None:
                self.model = self.__build_model(data_w_ret.shape, len(data.columns))
        
            fit_predict_data = data_w_ret[np.newaxis,:]        
            self.model.fit(fit_predict_data, np.zeros((1, len(data.columns))), epochs=10, shuffle=False)
            return self.model.predict(fit_predict_data)[0]
    
    model = Model() 
    weights = model.get_allocations(pd.DataFrame(np.random.randn(n_assets,n_assets)))
    weight_country[i,:] = weights
    #for j in range(n_assets):
    #    return_n8_corr[i,j] = np.sum(weight_n8_corr[i,j] * data_n8_corr[i][j])
    #    risks_n8_corr[i,j] = weight_n8_corr[i,j] * np.diagonal(np.cov(data_n8_corr[i].T))[j] * weight_n8_corr[i,j]

Drop one year at a time

In [32]:
weight_year = np.zeros((len(data_impute.groupby('Year')),len(cdat.T)-2))
return_year = np.zeros((len(data_impute.groupby('Year')),len(cdat.T)-2))
risks_year = np.zeros((len(data_impute.groupby('Year')),len(cdat.T)-2))

for i in range(len(data_impute.groupby('Year'))):
    year_dat = cdat.loc[(cdat.Year != cdat.Year.unique()[i])]
    year_dat = country_dat.loc[:,['Solid_fossil_fuels','Peat_and_peat_products','Oil_and_petroleum_products', 
                          'Natural_gas', 'Nuclear_heat', 'Hydro', 'Solar_Thermal', 'Geothermal', 
                          'Ambient_Heat', 'Tide_wave_and_ocean', 'Biofuels_solid', 'Biofuels_other']]
    data = country_dat
    
    class Model:
        def __init__(self):
            self.data = None
            self.model = None
        
            # self.callback = EarlyStopping(monitor='loss', min_delta=.1, patience = 10)
    
        def __build_model(self, input_shape, outputs):
            model = Sequential([
                LSTM(64, input_shape=input_shape, activation='relu'),
                Flatten(),
                Dense(outputs, activation='softmax')
            ])

            def sharpe_loss(_, y_pred):
                coeffs = tf.tile(y_pred, (self.data.shape[0], 1))
            
                portfolio_values = tf.reduce_sum(tf.multiply(coeffs, self.data), axis=1)
            
                portfolio_returns = (portfolio_values[1:] - portfolio_values[:-1]) / portfolio_values[:-1]  # % change formula

                sharpe = K.mean(portfolio_returns) / K.std(portfolio_returns)
            
                # exp keeps relative ordering between positives and negatives
                #   since we want to maximize sharp, while gradient descent minimizes the loss
                #   we negate the Sharpe value
                return K.exp(-sharpe)
        
            model.compile(loss=sharpe_loss, optimizer='adam')
            return model
    
        def get_allocations(self, data):
        
        
            # data with returns
            data_w_ret = np.concatenate([ data.values[1:], data.pct_change().values[1:] ], axis=1)
        
            data = data.iloc[1:]
            self.data = tf.cast(tf.constant(data), float)
        
            if self.model is None:
                self.model = self.__build_model(data_w_ret.shape, len(data.columns))
        
            fit_predict_data = data_w_ret[np.newaxis,:]        
            self.model.fit(fit_predict_data, np.zeros((1, len(data.columns))), epochs=10, shuffle=False)
            return self.model.predict(fit_predict_data)[0]
    
    model = Model() 
    weights = model.get_allocations(pd.DataFrame(np.random.randn(n_assets,n_assets)))
    weight_year[i,:] = weights
    #for j in range(n_assets):
    #    return_n8_corr[i,j] = np.sum(weight_n8_corr[i,j] * data_n8_corr[i][j])
    #    risks_n8_corr[i,j] = weight_n8_corr[i,j] * np.diagonal(np.cov(data_n8_corr[i].T))[j] * weight_n8_corr[i,j]

Drop random 20% of data

In [36]:
weight_rand = np.zeros((100,len(cdat.T)-2))
return_rand = np.zeros((100,len(cdat.T)-2))
risks_rand = np.zeros((100,len(cdat.T)-2))

for i in range(100):
    
    # Randomly take out 20% of the data
    _80_perct = int(cdat.shape[0]*4/5)
    cdat = cdat.iloc[random.sample(list(range(cdat.shape[0])), _80_perct)]
    data = cdat.loc[:,['Solid_fossil_fuels','Peat_and_peat_products','Oil_and_petroleum_products', 
                          'Natural_gas', 'Nuclear_heat', 'Hydro', 'Solar_Thermal', 'Geothermal', 
                          'Ambient_Heat', 'Tide_wave_and_ocean', 'Biofuels_solid', 'Biofuels_other']]
    class Model:
        def __init__(self):
            self.data = None
            self.model = None
        
            # self.callback = EarlyStopping(monitor='loss', min_delta=.1, patience = 10)
    
        def __build_model(self, input_shape, outputs):
            model = Sequential([
                LSTM(64, input_shape=input_shape, activation='relu'),
                Flatten(),
                Dense(outputs, activation='softmax')
            ])

            def sharpe_loss(_, y_pred):
                coeffs = tf.tile(y_pred, (self.data.shape[0], 1))
            
                portfolio_values = tf.reduce_sum(tf.multiply(coeffs, self.data), axis=1)
            
                portfolio_returns = (portfolio_values[1:] - portfolio_values[:-1]) / portfolio_values[:-1]  # % change formula

                sharpe = K.mean(portfolio_returns) / K.std(portfolio_returns)
            
                # exp keeps relative ordering between positives and negatives
                #   since we want to maximize sharp, while gradient descent minimizes the loss
                #   we negate the Sharpe value
                return K.exp(-sharpe)
        
            model.compile(loss=sharpe_loss, optimizer='adam')
            return model
    
        def get_allocations(self, data):
        
        
            # data with returns
            data_w_ret = np.concatenate([ data.values[1:], data.pct_change().values[1:] ], axis=1)
        
            data = data.iloc[1:]
            self.data = tf.cast(tf.constant(data), float)
        
            if self.model is None:
                self.model = self.__build_model(data_w_ret.shape, len(data.columns))
        
            fit_predict_data = data_w_ret[np.newaxis,:]        
            self.model.fit(fit_predict_data, np.zeros((1, len(data.columns))), epochs=10, shuffle=False)
            return self.model.predict(fit_predict_data)[0]
    
    model = Model() 
    weights = model.get_allocations(pd.DataFrame(np.random.randn(n_assets,n_assets)))
    weight_rand[i,:] = weights

## Repeat this while dropping missing data

In [37]:
data_drop = data_full.dropna()

Full Data

In [38]:
# full data with country and year
cdat = data_drop.loc[:,['Country','Year','Solid_fossil_fuels','Peat_and_peat_products','Oil_and_petroleum_products', 
                          'Natural_gas', 'Nuclear_heat', 'Hydro', 'Solar_Thermal', 'Geothermal', 
                          'Ambient_Heat', 'Tide_wave_and_ocean', 'Biofuels_solid', 'Biofuels_other']]

In [39]:
n_assets = 12
data = cdat
    
class Model:
    def __init__(self):
        self.data = None
        self.model = None
        
        # self.callback = EarlyStopping(monitor='loss', min_delta=.1, patience = 10)
    
    def __build_model(self, input_shape, outputs):
        model = Sequential([
            LSTM(64, input_shape=input_shape, activation='relu'),
            Flatten(),
            Dense(outputs, activation='softmax')
        ])

        def sharpe_loss(_, y_pred):
            coeffs = tf.tile(y_pred, (self.data.shape[0], 1))
            
            portfolio_values = tf.reduce_sum(tf.multiply(coeffs, self.data), axis=1)
            
            portfolio_returns = (portfolio_values[1:] - portfolio_values[:-1]) / portfolio_values[:-1]  # % change formula

            sharpe = K.mean(portfolio_returns) / K.std(portfolio_returns)
            
            # exp keeps relative ordering between positives and negatives
            #   since we want to maximize sharp, while gradient descent minimizes the loss
            #   we negate the Sharpe value
            return K.exp(-sharpe)
        
        model.compile(loss=sharpe_loss, optimizer='adam')
        return model
    
    def get_allocations(self, data):
        
        
        # data with returns
        data_w_ret = np.concatenate([ data.values[1:], data.pct_change().values[1:] ], axis=1)
        
        data = data.iloc[1:]
        self.data = tf.cast(tf.constant(data), float)
        
        if self.model is None:
            self.model = self.__build_model(data_w_ret.shape, len(data.columns))
        
        fit_predict_data = data_w_ret[np.newaxis,:]        
        self.model.fit(fit_predict_data, np.zeros((1, len(data.columns))), epochs=10, shuffle=False)
        return self.model.predict(fit_predict_data)[0]
    
model = Model() 
weights = model.get_allocations(pd.DataFrame(np.random.randn(n_assets,n_assets)))
weight_fulldata_drop = weights

Drop one country at a time

In [41]:
# full data with country and year
cdat = data_drop.loc[:,['Country','Year','Solid_fossil_fuels','Peat_and_peat_products','Oil_and_petroleum_products', 
                          'Natural_gas', 'Nuclear_heat', 'Hydro', 'Solar_Thermal', 'Geothermal', 
                          'Ambient_Heat', 'Tide_wave_and_ocean', 'Biofuels_solid', 'Biofuels_other']]

In [42]:
weight_country_drop = np.zeros((len(data_impute.groupby('Country')),len(cdat.T)-2))
return_country = np.zeros((len(data_impute.groupby('Country')),len(cdat.T)-2))
risks_country = np.zeros((len(data_impute.groupby('Country')),len(cdat.T)-2))

for i in range(len(data_impute.groupby('Country'))):
    country_dat = cdat.loc[(cdat.Country != cdat.Country.unique()[i])]
    country_dat = country_dat.loc[:,['Solid_fossil_fuels','Peat_and_peat_products','Oil_and_petroleum_products', 
                          'Natural_gas', 'Nuclear_heat', 'Hydro', 'Solar_Thermal', 'Geothermal', 
                          'Ambient_Heat', 'Tide_wave_and_ocean', 'Biofuels_solid', 'Biofuels_other']]
    data = country_dat
    
    class Model:
        def __init__(self):
            self.data = None
            self.model = None
        
            # self.callback = EarlyStopping(monitor='loss', min_delta=.1, patience = 10)
    
        def __build_model(self, input_shape, outputs):
            model = Sequential([
                LSTM(64, input_shape=input_shape, activation='relu'),
                Flatten(),
                Dense(outputs, activation='softmax')
            ])

            def sharpe_loss(_, y_pred):
                coeffs = tf.tile(y_pred, (self.data.shape[0], 1))
            
                portfolio_values = tf.reduce_sum(tf.multiply(coeffs, self.data), axis=1)
            
                portfolio_returns = (portfolio_values[1:] - portfolio_values[:-1]) / portfolio_values[:-1]  # % change formula

                sharpe = K.mean(portfolio_returns) / K.std(portfolio_returns)
            
                # exp keeps relative ordering between positives and negatives
                #   since we want to maximize sharp, while gradient descent minimizes the loss
                #   we negate the Sharpe value
                return K.exp(-sharpe)
        
            model.compile(loss=sharpe_loss, optimizer='adam')
            return model
    
        def get_allocations(self, data):
        
        
            # data with returns
            data_w_ret = np.concatenate([ data.values[1:], data.pct_change().values[1:] ], axis=1)
        
            data = data.iloc[1:]
            self.data = tf.cast(tf.constant(data), float)
        
            if self.model is None:
                self.model = self.__build_model(data_w_ret.shape, len(data.columns))
        
            fit_predict_data = data_w_ret[np.newaxis,:]        
            self.model.fit(fit_predict_data, np.zeros((1, len(data.columns))), epochs=10, shuffle=False)
            return self.model.predict(fit_predict_data)[0]
    
    model = Model() 
    weights = model.get_allocations(pd.DataFrame(np.random.randn(n_assets,n_assets)))
    weight_country_drop[i,:] = weights

Drop one year at a time

In [43]:
weight_year_drop = np.zeros((len(data_impute.groupby('Year')),len(cdat.T)-2))
return_year = np.zeros((len(data_impute.groupby('Year')),len(cdat.T)-2))
risks_year = np.zeros((len(data_impute.groupby('Year')),len(cdat.T)-2))

for i in range(len(data_impute.groupby('Year'))):
    year_dat = cdat.loc[(cdat.Year != cdat.Year.unique()[i])]
    year_dat = country_dat.loc[:,['Solid_fossil_fuels','Peat_and_peat_products','Oil_and_petroleum_products', 
                          'Natural_gas', 'Nuclear_heat', 'Hydro', 'Solar_Thermal', 'Geothermal', 
                          'Ambient_Heat', 'Tide_wave_and_ocean', 'Biofuels_solid', 'Biofuels_other']]
    data = country_dat
    
    class Model:
        def __init__(self):
            self.data = None
            self.model = None
        
            # self.callback = EarlyStopping(monitor='loss', min_delta=.1, patience = 10)
    
        def __build_model(self, input_shape, outputs):
            model = Sequential([
                LSTM(64, input_shape=input_shape, activation='relu'),
                Flatten(),
                Dense(outputs, activation='softmax')
            ])

            def sharpe_loss(_, y_pred):
                coeffs = tf.tile(y_pred, (self.data.shape[0], 1))
            
                portfolio_values = tf.reduce_sum(tf.multiply(coeffs, self.data), axis=1)
            
                portfolio_returns = (portfolio_values[1:] - portfolio_values[:-1]) / portfolio_values[:-1]  # % change formula

                sharpe = K.mean(portfolio_returns) / K.std(portfolio_returns)
            
                # exp keeps relative ordering between positives and negatives
                #   since we want to maximize sharp, while gradient descent minimizes the loss
                #   we negate the Sharpe value
                return K.exp(-sharpe)
        
            model.compile(loss=sharpe_loss, optimizer='adam')
            return model
    
        def get_allocations(self, data):
        
        
            # data with returns
            data_w_ret = np.concatenate([ data.values[1:], data.pct_change().values[1:] ], axis=1)
        
            data = data.iloc[1:]
            self.data = tf.cast(tf.constant(data), float)
        
            if self.model is None:
                self.model = self.__build_model(data_w_ret.shape, len(data.columns))
        
            fit_predict_data = data_w_ret[np.newaxis,:]        
            self.model.fit(fit_predict_data, np.zeros((1, len(data.columns))), epochs=10, shuffle=False)
            return self.model.predict(fit_predict_data)[0]
    
    model = Model() 
    weights = model.get_allocations(pd.DataFrame(np.random.randn(n_assets,n_assets)))
    weight_year_drop[i,:] = weights

Drop random 20% of data

In [44]:
weight_rand_drop = np.zeros((100,len(cdat.T)-2))
return_rand = np.zeros((100,len(cdat.T)-2))
risks_rand = np.zeros((100,len(cdat.T)-2))

for i in range(100):
    
    # Randomly take out 20% of the data
    _80_perct = int(cdat.shape[0]*4/5)
    cdat = cdat.iloc[random.sample(list(range(cdat.shape[0])), _80_perct)]
    data = cdat.loc[:,['Solid_fossil_fuels','Peat_and_peat_products','Oil_and_petroleum_products', 
                          'Natural_gas', 'Nuclear_heat', 'Hydro', 'Solar_Thermal', 'Geothermal', 
                          'Ambient_Heat', 'Tide_wave_and_ocean', 'Biofuels_solid', 'Biofuels_other']]
    class Model:
        def __init__(self):
            self.data = None
            self.model = None
        
            # self.callback = EarlyStopping(monitor='loss', min_delta=.1, patience = 10)
    
        def __build_model(self, input_shape, outputs):
            model = Sequential([
                LSTM(64, input_shape=input_shape, activation='relu'),
                Flatten(),
                Dense(outputs, activation='softmax')
            ])

            def sharpe_loss(_, y_pred):
                coeffs = tf.tile(y_pred, (self.data.shape[0], 1))
            
                portfolio_values = tf.reduce_sum(tf.multiply(coeffs, self.data), axis=1)
            
                portfolio_returns = (portfolio_values[1:] - portfolio_values[:-1]) / portfolio_values[:-1]  # % change formula

                sharpe = K.mean(portfolio_returns) / K.std(portfolio_returns)
            
                # exp keeps relative ordering between positives and negatives
                #   since we want to maximize sharp, while gradient descent minimizes the loss
                #   we negate the Sharpe value
                return K.exp(-sharpe)
        
            model.compile(loss=sharpe_loss, optimizer='adam')
            return model
    
        def get_allocations(self, data):
        
        
            # data with returns
            data_w_ret = np.concatenate([ data.values[1:], data.pct_change().values[1:] ], axis=1)
        
            data = data.iloc[1:]
            self.data = tf.cast(tf.constant(data), float)
        
            if self.model is None:
                self.model = self.__build_model(data_w_ret.shape, len(data.columns))
        
            fit_predict_data = data_w_ret[np.newaxis,:]        
            self.model.fit(fit_predict_data, np.zeros((1, len(data.columns))), epochs=10, shuffle=False)
            return self.model.predict(fit_predict_data)[0]
    
    model = Model() 
    weights = model.get_allocations(pd.DataFrame(np.random.randn(n_assets,n_assets)))
    weight_rand_drop[i,:] = weights

## Save Results

In [48]:
DeepLearningResults_Weights = [weight_fulldata, weight_country, weight_year, weight_rand, 
                              weight_fulldata_drop, weight_country_drop, weight_year_drop, weight_rand_drop]

file_name = "EUDeepLearningResultsWeights.pkl"
open_file = open(file_name, "wb")
pickle.dump(DeepLearningResults_Weights, open_file)
open_file.close()