<a href="https://colab.research.google.com/github/heamabc/Machine-Learning-on-Futures/blob/master/Smart_Beta_with_Regularization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np 
import pandas as pd 
import re
from scipy.optimize import minimize
import datetime as datetime
import plotly.graph_objects as go

In [0]:
input_directory = r"/content/drive/My Drive/FINA 4380/data.csv"

start_date = '1/3/2005'
end_date = '12/31/2010'

transaction_cost = 0.001

class data_generation:
    
    def __init__(self, input_directory, start_date, end_date):
        self.input_directory = input_directory
        self.start_date = start_date
        self.end_date = end_date
        
    @staticmethod
    def dropna_cols(df):
        return df.iloc[1:].dropna(axis=1)

    @staticmethod
    def drop_SPY(df):
        return df.drop(columns=['SPY'])

    @staticmethod
    def transform_ln_price(open_df):

        # If the first valid index is none, return first element, else return none
        def first_valid_index(series):
          first_index = series.first_valid_index()
          if first_index != None:
            return series.loc[first_index]
          else:
            return None

        ln_open_df = np.log(open_df/open_df.apply(first_valid_index))

        return ln_open_df
        
    def output_data(self, dropna=False, dropSPY=False):
        
        # Read data and slicing
        data = pd.read_csv(self.input_directory, index_col=0)
        data = data.loc[self.start_date:self.end_date]
        
        # Tickers
        regex_pat = re.compile(r'_.*')
        Tickers = data.columns.str.replace(regex_pat, '').unique()

        # Calculate effect of split
        splitFactor_cols = [ele + '_splitFactor' for ele in Tickers]
        splitFactor_df = data[splitFactor_cols]
        splitFactor_df = splitFactor_df.cumprod()
        splitFactor_df.columns = Tickers

        # Calculate effect of split cash dividend
        divCash_cols = [ele + '_divCash' for ele in Tickers]
        divCash_df = data[divCash_cols]
        divCash_df = divCash_df.cumsum()
        divCash_df.columns = Tickers

        # Slice open, close, volumne df
        open_cols = [ele + '_open' for ele in Tickers]
        close_cols = [ele + '_close' for ele in Tickers]
        volume_cols = [ele + '_volume' for ele in Tickers]

        open_df = data[open_cols]
        close_df = data[close_cols]
        volume_df = data[volume_cols]

        open_df.columns = Tickers
        close_df.columns = Tickers
        volume_df.columns = Tickers

        open_df = open_df * splitFactor_df + divCash_df * splitFactor_df
        close_df = close_df * splitFactor_df + divCash_df * splitFactor_df
        volume_df = volume_df * splitFactor_df

        # Return
        open_return_df = open_df.pct_change()
        close_return_df = close_df.pct_change()
        
        ln_open_df = self.transform_ln_price(open_df)

        if dropna == True:
            ln_open_df, open_df, close_df, volume_df, open_return_df, close_return_df = map( self.dropna_cols, [ln_open_df, open_df, close_df, volume_df, open_return_df, close_return_df])

        if dropSPY == True:
            ln_open_df, open_df, close_df, volume_df, open_return_df, close_return_df = map( self.drop_SPY, [ln_open_df, open_df, close_df, volume_df, open_return_df, close_return_df])

        # Some columns might be dropped
        Tickers = list(ln_open_df.columns)

        ln_open_np = ln_open_df.values

        return ln_open_np, open_df, close_df, volume_df, open_return_df, close_return_df, Tickers


class performance_generation:

    @staticmethod
    def sharpe(port_daily_return):
        return np.sqrt(252) * (port_daily_return.mean() / port_daily_return.std())

    @staticmethod
    def drawdown(port_culmulative_return):

        expanding_max = port_culmulative_return.expanding().max()
        expanding_min = port_culmulative_return.expanding().min()

        drawdown = expanding_min/expanding_max - 1
        max_drawdown = drawdown.iloc[-1]

        return drawdown, max_drawdown

    @staticmethod
    def annualized_return(port_culmulative_return):
        last_date = datetime.datetime.strptime(port_culmulative_return.index[-1], '%m/%d/%Y')
        first_date = datetime.datetime.strptime(port_culmulative_return.index[0], '%m/%d/%Y')
        days = (last_date - first_date).days

        return port_culmulative_return.iloc[-1]**(365/days) - 1

    @staticmethod
    def annualized_volatility(port_daily_return):
        return port_culmulative_return.std() * np.sqrt(252)

    def main(self, port_daily_return, port_culmulative_return):
        sharpe = self.sharpe(port_daily_return)
        drawdown, max_drawdown = self.drawdown(port_culmulative_return)
        annualized_return = self.annualized_return(port_culmulative_return)
        annualized_volatility = self.annualized_volatility(port_daily_return)

        return sharpe, drawdown, max_drawdown, annualized_return, annualized_volatility

In [0]:
# ==================================== Super class model ==============================================
class super_model:
    def __init__(self, return_df, weight=None, lambda_=1, regularization=False):
        self.lambda_ = lambda_
        self.regularization = regularization

        self.return_df = return_df

    @staticmethod
    def L2_regularization(w):
        return np.sum(w**2)

    def loss_function_with_regularization(self, w, cov):
        return self.loss_function(w, cov) + self.lambda_ * self.L2_regularization(w)

    def fit(self, long=True):
        cov = self.return_df.cov()
        n = cov.shape[0]
        weights = np.ones(n)/n
        cons = ({'type': 'eq', 'fun': lambda x:1-sum(x)})
        bnds = [(0,0.1) for i in weights]
        
        if long and self.regularization:
            res = minimize(self.loss_function_with_regularization, weights, args=(cov), method='SLSQP', constraints=cons, bounds = bnds)
        elif long and not self.regularization:
            res = minimize(self.loss_function, weights, args=(cov), method='SLSQP', constraints=cons, bounds = bnds)
        elif not long and self.regularization:
            res = minimize(self.loss_function_with_regularization, weights, args=(cov), method='SLSQP', constraints=cons)
        elif not long and not self.regularization:
            res = minimize(self.loss_function, weights, args=(cov), method='SLSQP', constraints=cons)

        self.weight = res.x
        return

    def calc_return(self, ):
        daily_return = (self.weight * self.return_df).sum(axis=1)
        culmulative_return = (daily_return + 1).cumprod()

        return daily_return, culmulative_return

# ==================================== MDR model subclass ==============================================
class MDR_model(super_model):

    @staticmethod
    def loss_function(w, cov):
        w_vol = np.sqrt(np.diag(cov)).dot(w.T)
        port_vol = np.sqrt(w.dot(cov).dot(w))
        diversification_ratio = w_vol/port_vol
        return -diversification_ratio

# ==================================== MSR model subclass ==============================================
class MSR_model(super_model):
    
    def loss_function(self, w, cov):
        mu = w.dot(self.return_df.mean())
        volatility = np.sqrt(w.dot(cov).dot(w))
        return -mu/volatility

class GMV_model(super_model):

    @staticmethod
    def loss_function(w, cov):
        return w.dot(cov).dot(w)

In [0]:
# Data Cleaning
generate_data = data_generation(input_directory, start_date, end_date)
ln_open_np, open_df, close_df, volume_df, open_return_df, close_return_df, Tickers = generate_data.output_data(dropna=True)
open_return_df = open_return_df.iloc[1:].dropna(axis=1)

In [0]:
# Model

# ==================================== MDR model ==============================================
MDR_normal_model = MDR_model(open_return_df, regularization=False)
MDR_normal_model.fit()
MDR_weight = MDR_normal_model.weight
MDR_daily_return, MDR_cumulative_return = MDR_normal_model.calc_return()

MDR_regularized_model = MDR_model(open_return_df, regularization=True)
MDR_regularized_model.fit()
MDR_weight_regularized = MDR_regularized_model.weight
MDR_daily_return_regularized, MDR_cumulative_return_regularized = MDR_regularized_model.calc_return()

# ==================================== MSR model  ==============================================
MSR_normal_model = MSR_model(open_return_df, regularization=False)
MSR_normal_model.fit()
MSR_weight = MSR_normal_model.weight
MSR_daily_return, MSR_cumulative_return = MSR_normal_model.calc_return()

MSR_regularized_model = MSR_model(open_return_df, regularization=True)
MSR_regularized_model.fit()
MSR_weight_regularized = MSR_regularized_model.weight
MSR_daily_return_regularized, MSR_cumulative_return_regularized = MSR_regularized_model.calc_return()

# ==================================== GMB model  ==============================================
GMV_normal_model = GMV_model(open_return_df, regularization=False)
GMV_normal_model.fit()
GMV_weight = GMV_normal_model.weight
GMV_daily_return, GMV_cumulative_return = GMV_normal_model.calc_return()

GMV_regularized_model = GMV_model(open_return_df, regularization=True)
GMV_regularized_model.fit()
GMV_weight_regularized = GMV_regularized_model.weight
GMV_daily_return_regularized, GMV_cumulative_return_regularized = GMV_regularized_model.calc_return()

In [219]:
fig = go.Figure()

fig.add_trace(go.Scatter(
      x=SPY_culmulative_return.index,
      y=SPY_culmulative_return,
      name="SPY"
  ))

fig.add_trace(go.Scatter(
      x=SPY_culmulative_return.index,
      y=MDR_cumulative_return,
      name="MSR"
  ))

fig.add_trace(go.Scatter(
      x=SPY_culmulative_return.index,
      y=MDR_cumulative_return_regularized,
      name="MSR_regularized"
  ))

fig.show()

In [220]:
fig = go.Figure()

fig.add_trace(go.Scatter(
      x=SPY_culmulative_return.index,
      y=SPY_culmulative_return,
      name="SPY"
  ))

fig.add_trace(go.Scatter(
      x=SPY_culmulative_return.index,
      y=MSR_cumulative_return,
      name="MSR"
  ))

fig.add_trace(go.Scatter(
      x=SPY_culmulative_return.index,
      y=MSR_cumulative_return_regularized,
      name="MSR_regularized"
  ))

fig.show()

In [230]:
fig = go.Figure()

fig.add_trace(go.Scatter(
      x=SPY_culmulative_return.index,
      y=SPY_culmulative_return,
      name="SPY"
  ))

fig.add_trace(go.Scatter(
      x=SPY_culmulative_return.index,
      y=GMV_cumulative_return,
      name="GMV"
  ))

fig.add_trace(go.Scatter(
      x=SPY_culmulative_return.index,
      y=GMV_cumulative_return_regularized,
      name="GMV_regularized"
  ))

fig.show()

In [231]:
print(123)

123
