In [119]:
# Initial Imports
import pandas as pd
import numpy as np
from joblib import dump, load
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
%matplotlib inline
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

In [120]:
#Define function to read in factors from csv and return cleaned dataframe
def get_factors(factors):
  factor_file=factors+".csv"
  factor_df = pd.read_csv(factor_file)

  #clean factor dataframe
  factor_df = factor_df.rename(columns={
    'Unnamed: 0': 'Date',
  })

  factor_df['Date'] = factor_df['Date'].apply(lambda x: pd.to_datetime(str(x), format='%Y%m%d'))

  # Set "Date" as Index:
  factor_df = factor_df.set_index('Date')

  return factor_df

factors = get_factors("french_fama")
factors

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1926-07-01,0.10,-0.24,-0.28,0.009
1926-07-02,0.45,-0.32,-0.08,0.009
1926-07-06,0.17,0.27,-0.35,0.009
1926-07-07,0.09,-0.59,0.03,0.009
1926-07-08,0.21,-0.36,0.15,0.009
...,...,...,...,...
2020-11-23,0.83,0.85,1.55,0.000
2020-11-24,1.56,-0.15,2.74,0.000
2020-11-25,0.03,0.01,-1.27,0.000
2020-11-27,0.43,0.45,-1.66,0.000


In [121]:
# Confirm French Fama DataFrame:
factors.head(3)

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1926-07-01,0.1,-0.24,-0.28,0.009
1926-07-02,0.45,-0.32,-0.08,0.009
1926-07-06,0.17,0.27,-0.35,0.009


In [123]:
def choose_stock(ticker):
  ticker_file=ticker+".csv"
  stock=pd.read_csv(ticker_file, index_col='Date', parse_dates=True, infer_datetime_format=True)
  stock["Returns"]=stock["Close"].dropna().pct_change()*100
  stock.index = pd.Series(stock.index).dt.date


  return stock




In [124]:
ticker="T"
stock=choose_stock(ticker)
stock.head()

Unnamed: 0_level_0,Close,Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2000-01-03,47.0,
2000-01-04,44.25,-5.851064
2000-01-05,44.94,1.559322
2000-01-06,43.75,-2.647975
2000-01-07,44.13,0.868571


In [125]:
# Concatenate Fama French DataFrame with ATT DataFrame:
combined_df = pd.concat([factors, stock], axis='columns', join='inner')

# Drop nulls:
combined_df = combined_df.dropna()

combined_df = combined_df.drop('RF', axis=1)

# Preview DataFrame (the 4 columns are the 4 features/x-variables)
combined_df.head()

Unnamed: 0_level_0,Mkt-RF,SMB,HML,Close,Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2000-01-04,-4.06,-0.01,2.14,44.25,-5.851064
2000-01-05,-0.09,0.15,0.29,44.94,1.559322
2000-01-06,-0.73,-0.41,1.27,43.75,-2.647975
2000-01-07,3.21,-0.5,-1.43,44.13,0.868571
2000-01-10,1.76,0.97,-1.52,44.75,1.40494


In [126]:
# Define X and y variables:
X = combined_df.drop('Returns', axis=1)
X = X.drop('Close',axis=1)
y = combined_df.loc[:, 'Returns']

In [128]:
# Split into Training/Testing Data:
#from sklearn.model_selection import train_test_split

#X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

split = int(0.8 * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]
close_test=combined_df["Close"][split:]
close_test

Date
2016-09-26    41.14
2016-09-27    41.46
2016-09-28    40.85
2016-09-29    40.73
2016-09-30    40.61
              ...  
2020-11-20    28.32
2020-11-23    28.62
2020-11-24    29.23
2020-11-25    28.99
2020-11-27    29.03
Name: Close, Length: 1052, dtype: float64

In [129]:
# Import Linear Regression Model from SKLearn:
from sklearn.linear_model import LinearRegression

# Create, train, and predict model:
lin_reg_model = LinearRegression()
lin_reg_model = lin_reg_model.fit(X_train, y_train)
predictions = lin_reg_model.predict(X_test)

In [130]:
# Convert y_test to a dataframe:
y_test = y_test.to_frame()

In [131]:
signals_df = y_test.copy()

# Add "predictions" to dataframe:
y_test['Predictions'] = predictions
y_test["Close"]=close_test

# set thresholds for when to buy and sell. the below are percentages calcuated as predictions / att returns -1
#buy_threshold=0.50 
#sell_threshold=-0.50

y_test['Buy Signal'] = np.where(y_test['Predictions'] > y_test['Returns'], 1.0,0.0)

# Drop nulls:
y_test=y_test.dropna()
y_test.head()



Unnamed: 0_level_0,Returns,Predictions,Close,Buy Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-09-26,-0.339147,-0.610985,41.14,0.0
2016-09-27,0.777832,0.49073,41.46,0.0
2016-09-28,-1.471298,0.404347,40.85,1.0
2016-09-29,-0.293758,-0.541782,40.73,0.0
2016-09-30,-0.294623,0.536362,40.61,1.0


In [132]:
#functions to generate signals dataframe for algorithm
def generate_signals(input_df, start_capital=100000, share_count=2000):
  # Set initial capital
  initial_capital = float(start_capital)

  # Set the share size
  share_size = share_count

  signals_df = input_df.copy()

  # Take a 500 share position where the Buy Signal is 1 (prior day's predictions greater than prior day's returns):
  signals_df['Position'] = share_size * signals_df['Buy Signal']

  #Make Entry / Exit Column
  signals_df['Entry/Exit']=signals_df["Buy Signal"].diff()

  # Find the points in time where a 500 share position is bought or sold
  signals_df['Entry/Exit Position'] = signals_df['Position'].diff()

  # Multiply share price by entry/exit positions and get the cumulatively sum
  signals_df['Portfolio Holdings'] = signals_df['Close'] * signals_df['Entry/Exit Position'].cumsum()

  # Subtract the initial capital by the portfolio holdings to get the amount of liquid cash in the portfolio
  signals_df['Portfolio Cash'] = initial_capital - (signals_df['Close'] * signals_df['Entry/Exit Position']).cumsum()

  # Get the total portfolio value by adding the cash amount by the portfolio holdings (or investments)
  signals_df['Portfolio Total'] = signals_df['Portfolio Cash'] + signals_df['Portfolio Holdings']

  # Calculate the portfolio daily returns
  signals_df['Portfolio Daily Returns'] = signals_df['Portfolio Total'].pct_change()

  # Calculate the cumulative returns
  signals_df['Portfolio Cumulative Returns'] = (1 + signals_df['Portfolio Daily Returns']).cumprod() - 1

  signals_df = signals_df.dropna()
  

  return signals_df


In [133]:
#View signals Dataframe using generate signals function
signals_df=generate_signals(y_test)
signals_df.head(10)

Unnamed: 0_level_0,Returns,Predictions,Close,Buy Signal,Position,Entry/Exit,Entry/Exit Position,Portfolio Holdings,Portfolio Cash,Portfolio Total,Portfolio Daily Returns,Portfolio Cumulative Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2016-09-28,-1.471298,0.404347,40.85,1.0,2000.0,1.0,2000.0,81700.0,18300.0,100000.0,0.0,0.0
2016-09-29,-0.293758,-0.541782,40.73,0.0,0.0,-1.0,-2000.0,0.0,99760.0,99760.0,-0.0024,-0.0024
2016-09-30,-0.294623,0.536362,40.61,1.0,2000.0,1.0,2000.0,81220.0,18540.0,99760.0,0.0,-0.0024
2016-10-03,0.393992,-0.199927,40.77,0.0,0.0,-1.0,-2000.0,0.0,100080.0,100080.0,0.003208,0.0008
2016-10-04,-2.133922,-0.362826,39.9,1.0,2000.0,1.0,2000.0,79800.0,20280.0,100080.0,0.0,0.0008
2016-10-05,-1.854637,0.37787,39.16,1.0,2000.0,0.0,0.0,78320.0,20280.0,98600.0,-0.014788,-0.014
2016-10-06,-0.127681,0.086687,39.11,1.0,2000.0,0.0,0.0,78220.0,20280.0,98500.0,-0.001014,-0.015
2016-10-07,-0.613654,-0.059479,38.87,1.0,2000.0,0.0,0.0,77740.0,20280.0,98020.0,-0.004873,-0.0198
2016-10-10,0.360175,0.147721,39.01,0.0,0.0,-1.0,-2000.0,0.0,98300.0,98300.0,0.002857,-0.017
2016-10-11,0.23071,-0.709558,39.1,0.0,0.0,0.0,0.0,0.0,98300.0,98300.0,0.0,-0.017


In [134]:
def algo_evaluation(signals_df):
  # Prepare DataFrame for metrics
  metrics = [
      'Annual Return',
      'Cumulative Returns',
      'Annual Volatility',
      'Sharpe Ratio',
      'Sortino Ratio']

  columns = ['Backtest']

  # Initialize the DataFrame with index set to evaluation metrics and column as `Backtest` (just like PyFolio)
  portfolio_evaluation_df = pd.DataFrame(index=metrics, columns=columns)
  #get cumulative returns
  portfolio_evaluation_df.loc['Cumulative Returns'] = signals_df['Portfolio Cumulative Returns'][-1]
  #calc annualized returns
  portfolio_evaluation_df.loc['Annual Return'] = (signals_df['Portfolio Daily Returns'].mean() * 252)
  # Calculate annual volatility
  portfolio_evaluation_df.loc['Annual Volatility'] = (signals_df['Portfolio Daily Returns'].std() * np.sqrt(252))
  # Calculate Sharpe Ratio
  portfolio_evaluation_df.loc['Sharpe Ratio'] = (signals_df['Portfolio Daily Returns'].mean() * 252) / (signals_df['Portfolio Daily Returns'].std() * np.sqrt(252))

  #Calculate Sortino Ratio
  #Calculate Downside Return
  sortino_ratio_df = signals_df[['Portfolio Daily Returns']].copy()
  sortino_ratio_df.loc[:,'Downside Returns'] = 0

  target = 0
  mask = sortino_ratio_df['Portfolio Daily Returns'] < target
  sortino_ratio_df.loc[mask, 'Downside Returns'] = sortino_ratio_df['Portfolio Daily Returns']**2
  down_stdev = np.sqrt(sortino_ratio_df['Downside Returns'].mean()) * np.sqrt(252)
  expected_return = sortino_ratio_df['Portfolio Daily Returns'].mean() * 252
  sortino_ratio = expected_return/down_stdev

  portfolio_evaluation_df.loc['Sortino Ratio'] = sortino_ratio


  return portfolio_evaluation_df



In [135]:
algo_evaluation(signals_df)

Unnamed: 0,Backtest
Annual Return,0.030028
Cumulative Returns,0.0978
Annual Volatility,0.123528
Sharpe Ratio,0.243087
Sortino Ratio,0.340549


In [136]:
# Define function to evaluate the underlying asset
def underlying_evaluation(signals_df):
  underlying=pd.DataFrame()
  underlying["Close"]=signals_df["Close"]
  underlying["Portfolio Daily Returns"]=underlying["Close"].pct_change()
  underlying["Portfolio Daily Returns"].fillna(0,inplace=True)
  underlying['Portfolio Cumulative Returns']=(1 + underlying['Portfolio Daily Returns']).cumprod() - 1

  underlying_evaluation=algo_evaluation(underlying)

  return underlying_evaluation

#underlying_evaluation(signals_df)


    

In [137]:
#Define Function to return algo evaluation relative to underlying asset combines the two evaluations into a single dataframe
def algo_vs_underlying(signals_df):
  metrics = [
      'Annual Return',
      'Cumulative Returns',
      'Annual Volatility',
      'Sharpe Ratio',
      'Sortino Ratio']

  columns = ['Algo','Underlying']
  algo=algo_evaluation(signals_df)
  underlying=underlying_evaluation(signals_df)

  comparison_df=pd.DataFrame(index=metrics,columns=columns)
  comparison_df['Algo']=algo['Backtest']
  comparison_df['Underlying']=underlying['Backtest']

  return comparison_df

algo_vs_underlying(signals_df)
  


Unnamed: 0,Algo,Underlying
Annual Return,0.030028,-0.0516554
Cumulative Returns,0.0978,-0.289351
Annual Volatility,0.123528,0.24564
Sharpe Ratio,0.243087,-0.210289
Sortino Ratio,0.340549,-0.282077


In [92]:
#define function which accepts daily signals dataframe and returns evaluations of individual trades
def trade_evaluation(signals_df):
  
  #initialize dataframe
  trade_evaluation_df = pd.DataFrame(
    columns=[
        'Entry Date', 
        'Exit Date', 
        'Shares', 
        'Entry Share Price', 
        'Exit Share Price', 
        'Entry Portfolio Holding', 
        'Exit Portfolio Holding', 
        'Profit/Loss']
  )
  
  
  entry_date = ''
  exit_date = ''
  entry_portfolio_holding = 0
  exit_portfolio_holding = 0
  share_size = 0
  entry_share_price = 0
  exit_share_price = 0

  # Loop through signal DataFrame
  # If `Entry/Exit` is 1, set entry trade metrics
  # Else if `Entry/Exit` is -1, set exit trade metrics and calculate profit,
  # Then append the record to the trade evaluation DataFrame
  for index, row in signals_df.iterrows():
      if row['Entry/Exit'] == 1:
          entry_date = index
          entry_portfolio_holding = row['Portfolio Holdings']
          share_size = row['Entry/Exit Position']
          entry_share_price = row['Close']

      elif row['Entry/Exit'] == -1:
          exit_date = index
          exit_portfolio_holding = abs(row['Close'] * row['Entry/Exit Position'])
          exit_share_price = row['Close']
          profit_loss = exit_portfolio_holding - entry_portfolio_holding
          trade_evaluation_df = trade_evaluation_df.append(
              {
                  'Entry Date': entry_date,
                  'Exit Date': exit_date,
                  'Shares': share_size,
                  'Entry Share Price': entry_share_price,
                  'Exit Share Price': exit_share_price,
                  'Entry Portfolio Holding': entry_portfolio_holding,
                  'Exit Portfolio Holding': exit_portfolio_holding,
                  'Profit/Loss': profit_loss
              },
              ignore_index=True)

  # Print the DataFrame
  return trade_evaluation_df



In [138]:
trade_evaluation_df=trade_evaluation(signals_df)
trade_evaluation_df

Unnamed: 0,Entry Date,Exit Date,Shares,Entry Share Price,Exit Share Price,Entry Portfolio Holding,Exit Portfolio Holding,Profit/Loss
0,2016-09-28,2016-09-29,2000.0,40.85,40.73,81700.0,81460.0,-240.0
1,2016-09-30,2016-10-03,2000.0,40.61,40.77,81220.0,81540.0,320.0
2,2016-10-04,2016-10-10,2000.0,39.90,39.01,79800.0,78020.0,-1780.0
3,2016-10-14,2016-10-17,2000.0,39.22,39.27,78440.0,78540.0,100.0
4,2016-10-18,2016-10-27,2000.0,39.36,36.52,78720.0,73040.0,-5680.0
...,...,...,...,...,...,...,...,...
270,2020-11-06,2020-11-09,2000.0,27.44,28.30,54880.0,56600.0,1720.0
271,2020-11-11,2020-11-13,2000.0,28.73,28.91,57460.0,57820.0,360.0
272,2020-11-16,2020-11-17,2000.0,28.97,29.03,57940.0,58060.0,120.0
273,2020-11-18,2020-11-20,2000.0,28.62,28.32,57240.0,56640.0,-600.0


In [None]:
#Define function that summarizes the trade evaluation dataframe
#fields: Beginning evaluation date, Ending Date, Number of Trades, Number of Profitable Trades, Number of Losing Trades, Avg. PnL, Max PnL, Min PnL



In [None]:
#Define function(s) that provide graphs of the trade evalution dataframe bar charts for profit loss etc. 