In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
class LinearRegression:
    def train(self, X, y):
        #Add bias term to X data
        X = np.c_[np.ones(X.shape[0]), X]
        #Calculate weights using closed form solution
        self.weights = np.linalg.pinv(X.T.dot(X)).dot(X.T).dot(y)
        return self.weights[0], self.weights[1]
    def predict(self, X):
        #Account for X being a single value, convert it to an array
        X = np.array([X])       
        #Add bias term to X data
        X = np.c_[np.ones(X.shape[0]), X]
        return float(X.dot(self.weights))

In [3]:
class LinearRegression:
    #Linear Regression using closed form solution
    def train(self, X, y):
        #normalise data
        self.x_min = np.amin(X)
        self.x_scale = np.amax(X) - self.x_min
        if (self.x_scale <= 0): self.x_scale = 1
        X = (X - self.x_min) / self.x_scale

        self.y_min = np.amin(y)
        self.y_scale = np.amax(y) - self.y_min
        if (self.y_scale <= 0): self.y_scale = 1
        y = (y - self.y_min) / self.y_scale

        #Add bias term to X data
        X = np.c_[np.ones(X.shape[0]), X]
        #Calculate weights using closed form solution
        self.weights = np.linalg.pinv(X.T.dot(X)).dot(X.T).dot(y)
        return self.weights[0], self.weights[1], self.x_min, self.x_scale, self.y_min, self.y_scale

    def predict(self, X):
        #Account for X being a single value, convert it to an array
        X = np.array([X])       
        #Scale X data
        X = (X - self.x_min) / self.x_scale
        #Add bias term to X data
        X = np.c_[np.ones(X.shape[0]), X]
        #Predict y values
        y = X.dot(self.weights)
        #Scale y values
        y = y * self.y_scale + self.y_min
        return float(y)

In [4]:
class WeightedLinearRegression:
    def train(self, X, y):
        #Add bias term to X data
        X = np.c_[np.ones(X.shape[0]), X]
        #Calculate weights using closed form solution
        self.importance = np.diag([1/(6-i)**2 for i in range(6)])
        self.weights = np.linalg.pinv(X.T.dot(self.importance).dot(X)).dot(X.T).dot(self.importance).dot(y)
        return self.weights[0], self.weights[1]
    def predict(self, X):
        #Account for X being a single value, convert it to an array
        X = np.array([X])    
        #Add bias term to X data
        X = np.c_[np.ones(X.shape[0]), X]
        return float(X.dot(self.weights))

In [5]:
class WeightedLinearRegression:
    #Linear regression using weighted least squares and closed form solution
    def train(self, X, y):
        #normalise data
        self.x_min = np.amin(X)
        self.x_scale = np.amax(X) - self.x_min
        if (self.x_scale <= 0): self.x_scale = 1
        X = (X - self.x_min) / self.x_scale

        self.y_min = np.amin(y)
        self.y_scale = np.amax(y) - self.y_min
        if (self.y_scale <= 0): self.y_scale = 1
        y = (y - self.y_min) / self.y_scale

        #Add bias term to X data
        X = np.c_[np.ones(X.shape[0]), X]
        #Calculate weights using closed form solution
        self.importance = np.diag([1/(6-i)**2 for i in range(6)])
        self.weights = np.linalg.pinv(X.T.dot(self.importance).dot(X)).dot(X.T).dot(self.importance).dot(y)
        return self.weights[0], self.weights[1], self.x_min, self.x_scale, self.y_min, self.y_scale
    
    def predict(self, X):
        #Account for X being a single value, convert it to an array
        X = np.array([X])
        #Scale X data
        X = (X - self.x_min) / self.x_scale       
        #Add bias term to X data
        X = np.c_[np.ones(X.shape[0]), X]
        #Predict y values
        y = X.dot(self.weights)
        #Scale y values
        y = y * self.y_scale + self.y_min
        return float(y)

In [6]:
#Load Historical Stock Data
aapl = pd.read_csv('Data/AAPL.csv')
amd = pd.read_csv('Data/AMD.csv')
tsla = pd.read_csv('Data/TSLA.csv')

#drop open, high, low, adj close, volume
aapl = aapl.drop(['Open', 'High', 'Low', 'Adj Close', 'Volume'], axis=1)
amd = amd.drop(['Open', 'High', 'Low', 'Adj Close', 'Volume'], axis=1)
tsla = tsla.drop(['Open', 'High', 'Low', 'Adj Close', 'Volume'], axis=1)

#Convert date to int
aapl['Date'] = pd.to_datetime(aapl['Date']).astype(np.int64)
amd['Date'] = pd.to_datetime(amd['Date']).astype(np.int64)
tsla['Date'] = pd.to_datetime(tsla['Date']).astype(np.int64)

In [7]:
#Run training to determine optimal lookback period
stocks = {'AAPL': aapl, 'AMD': amd, 'TSLA':tsla}
for stockName, stock in stocks.items():
    #Train test split leaving the past 2 years as test data
    X_train = stock['Date'].iloc[:len(stock)-365*2] 
    y_train = stock['Close'].iloc[:len(stock)-365*2]

    #Store lb, mae
    fileSave = {'lookback':[], 'MAE':[]}
    for lookbackCount in range(2,14,1): #Iterate lookback from 2 days to 2 weeks
        errors = []
        for predictIndex in range(lookbackCount, len(X_train)):
            startIndex = predictIndex - lookbackCount
            LR = LinearRegression()
            LR.train(X_train.iloc[startIndex:predictIndex], y_train.iloc[startIndex:predictIndex])
            err = abs(y_train.iloc[predictIndex] - LR.predict(X_train.iloc[predictIndex]))
            errors.append(err)
        #store results
        fileSave['lookback'].append(lookbackCount)
        fileSave['MAE'].append(np.mean(errors))
    pd.DataFrame(fileSave).to_csv('Results/'+stockName+'_results.csv')

#Read Results CSV, find the best lookback period by finding lowest MAE
aapl_results = pd.read_csv('Results/aapl_results.csv', index_col=0)
amd_results = pd.read_csv('Results/AMD_results.csv', index_col=0)
tsla_results = pd.read_csv('Results/TSLA_results.csv', index_col=0)

#Get the best lookback period for each stock
lookbackDictionary = {}
lookbackDictionary['AAPL'] = aapl_results['lookback'][aapl_results['MAE'].idxmin()]
lookbackDictionary['AMD'] = amd_results['lookback'][amd_results['MAE'].idxmin()]
lookbackDictionary['TSLA'] = tsla_results['lookback'][tsla_results['MAE'].idxmin()]
print(lookbackDictionary)

{'AAPL': 6, 'AMD': 6, 'TSLA': 6}


In [12]:
#show APPL, AMD and TSLA results for the top 5 lookback periods with the lowest MAE
print("AAPL")
print(aapl_results.sort_values(by=['MAE']).head(5))
print("AMD")
print(amd_results.sort_values(by=['MAE']).head(5))
print("TSLA")
print(tsla_results.sort_values(by=['MAE']).head(6)) #just to show the 6th place

AAPL
   lookback       MAE
4         6  0.119736
3         5  0.122650
5         7  0.122728
2         4  0.126048
6         8  0.126512
AMD
   lookback       MAE
4         6  0.381567
5         7  0.391733
3         5  0.397186
2         4  0.406129
6         8  0.407270
TSLA
   lookback       MAE
4         6  0.339822
3         5  0.345133
2         4  0.348928
5         7  0.349248
1         3  0.358240
6         8  0.360301


In [8]:
print(np.diag([1/(6-i)**2 for i in range(6)]))

[[0.02777778 0.         0.         0.         0.         0.        ]
 [0.         0.04       0.         0.         0.         0.        ]
 [0.         0.         0.0625     0.         0.         0.        ]
 [0.         0.         0.         0.11111111 0.         0.        ]
 [0.         0.         0.         0.         0.25       0.        ]
 [0.         0.         0.         0.         0.         1.        ]]


In [21]:
for model in [LinearRegression(), WeightedLinearRegression()]:
    print(f'\nModel: {model}')
    stocks = {'AAPL': aapl, 'AMD': amd, 'TSLA':tsla}
    for stockName, stock in stocks.items():
        X_test = stock['Date'].iloc[len(stock)-365*2:]
        y_test = stock['Close'].iloc[len(stock)-365*2:]
        lookbackCount = 6
        errors = []
        for predictIndex in range(lookbackCount, len(X_test)):
            startIndex = predictIndex - lookbackCount
            model.train(X_test.iloc[startIndex:predictIndex], y_test.iloc[startIndex:predictIndex])
            err = abs(y_test.iloc[predictIndex] - model.predict(X_test.iloc[predictIndex]))
            errors.append(err)
        print(f'Stock: {stockName}, Lookback: {lookbackCount}, MAE: {np.mean(errors)}')


Model: <__main__.LinearRegression object at 0x000002B4635E66E0>
Stock: AAPL, Lookback: 6, MAE: 2.4640901147646472
Stock: AMD, Lookback: 6, MAE: 2.721878864495098
Stock: TSLA, Lookback: 6, MAE: 7.661226340929522

Model: <__main__.WeightedLinearRegression object at 0x000002B4635E6680>
Stock: AAPL, Lookback: 6, MAE: 2.2310091485460743
Stock: AMD, Lookback: 6, MAE: 2.4982656249339126
Stock: TSLA, Lookback: 6, MAE: 6.749398980365186


### Profits

In [20]:
for model in["Control", LinearRegression(), WeightedLinearRegression()]:
    for timePeriod in [1,7,30]:
        print(f'\nTime Period (days): {timePeriod}, Model: {model}')
        stocks = {'AAPL': aapl, 'AMD': amd, 'TSLA':tsla}
        for stockName, stock in stocks.items():
            lookbackCount = 6
            X_test = stock['Date'].iloc[len(stock)-365*2-lookbackCount:]
            y_test = stock['Close'].iloc[len(stock)-365*2-lookbackCount:]
            profit = 0, 
            wrongTrade = 0
            for day in range(lookbackCount,len(X_test)-1,timePeriod):
                stockCount = ( 10 * timePeriod) / y_test.iloc[day]
                if model == "Control":
                    maxPrice = max(y_test.iloc[day:day+timePeriod+1])
                    maxPriceIndex = y_test.iloc[day:day+timePeriod+1].tolist().index(maxPrice) -1
                else:
                    model.train(X_test.iloc[day-(lookbackCount-1):day+1], y_test.iloc[day-(lookbackCount-1):day+1])
                    predictions = []           
                    for i in range(timePeriod):
                        try:
                            predictions.append(model.predict(X_test.iloc[day+1+i]))
                        except:
                            predictions.append(0)
                    maxPrice = max(predictions)
                    maxPriceIndex = predictions.index(maxPrice)
                profit += stockCount * y_test.iloc[day+1+maxPriceIndex] - (10 * timePeriod)
                if y_test.iloc[day+1+maxPriceIndex] < y_test.iloc[day]:
                    wrongTrade += 1
            print(f'Stock: {stockName}, Profit: {profit}, Wrong Trades: {wrongTrade}/{(len(X_test)-lookbackCount) // timePeriod}')


Time Period (days): 1, Model: Control
Stock: AAPL, Profit: [63.29733668], Wrong Trades: 0/730
Stock: AMD, Profit: [97.06007142], Wrong Trades: 0/730
Stock: TSLA, Profit: [131.19135589], Wrong Trades: 0/730

Time Period (days): 7, Model: Control
Stock: AAPL, Profit: [246.63285512], Wrong Trades: 0/104
Stock: AMD, Profit: [414.76173808], Wrong Trades: 0/104
Stock: TSLA, Profit: [575.0444542], Wrong Trades: 0/104

Time Period (days): 30, Model: Control
Stock: AAPL, Profit: [779.01040871], Wrong Trades: 0/24
Stock: AMD, Profit: [1179.60247279], Wrong Trades: 0/24
Stock: TSLA, Profit: [2236.15390396], Wrong Trades: 0/24

Time Period (days): 1, Model: <__main__.LinearRegression object at 0x000002B4635E5A20>
Stock: AAPL, Profit: [9.42969692], Wrong Trades: 345/730
Stock: AMD, Profit: [9.84779814], Wrong Trades: 364/730
Stock: TSLA, Profit: [31.99761288], Wrong Trades: 327/730

Time Period (days): 7, Model: <__main__.LinearRegression object at 0x000002B4635E5A20>
Stock: AAPL, Profit: [46.0043