In [1]:
import pandas as pd
import numpy as np
import sympy as sp
import math
from sklearn import linear_model
from sklearn.model_selection import train_test_split

ModuleNotFoundError: No module named 'pandas'

In [31]:
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

#### Test 1: Solve overdetermined system of equations with least squares solution

In [32]:
# Adjust data set
df_pre = pd.read_csv("BABA_2014_2018.csv")
df_pre.columns = df_pre.columns.str.strip().str.replace(' ', '_').str.replace('(', '').str.replace(')', '')
df_pre['Ones'] = 1
df_pre['RSI_yesterday'] = df_pre.RSI.shift(+1)
df_pre['MACD_yesterday'] = df_pre.MACD.shift(+1)
df_pre['Stochastic_RSI_yesterday'] = df_pre.Stochastic.shift(+1)
df_pre['Stochastic_SMA_yesterday'] = df_pre.Stochastic_SMA.shift(+1)
df_pre = df_pre.loc[830:] # Looking only at 2018; include 'Dates' column to check
df = df_pre[['Date', 'RSI_yesterday', 'MACD_yesterday', 'Stochastic_RSI_yesterday', 'Stochastic_SMA_yesterday', 'Ones', 'pctChange']]

In [33]:
# Split data between training set and validation set
train, test = train_test_split(df, test_size=0.2)

In [34]:
# Create X and y matrices from features and outputs
X_df = train[['RSI_yesterday', 'MACD_yesterday', 'Stochastic_RSI_yesterday', 'Stochastic_SMA_yesterday', 'Ones']].copy()
y_df = train[['pctChange']].copy()
X_test = test[['RSI_yesterday', 'MACD_yesterday', 'Stochastic_RSI_yesterday', 'Stochastic_SMA_yesterday', 'Ones']].copy()
y_test = test[['pctChange']].copy()

# create numpy arrays from pandas dataframes to solve
X = X_df.values
y = y_df.values
X_test = X_test.values
y_test = y_test.values

In [35]:
# solve analytically

X = np.matrix(X)
y = np.matrix(y)

XTX_inv = np.linalg.inv(np.matmul(np.transpose(X),X))
XTX_inv_XT = np.matmul(XTX_inv,np.transpose(X))
w_0 = np.matmul(XTX_inv_XT, y)
w_0

matrix([[-0.00020992],
        [-0.0004456 ],
        [-0.00029365],
        [ 0.00038744],
        [ 0.00441074]])

In [36]:
# solve with least squares method
sol = np.linalg.lstsq(X,y, rcond=None)[0].tolist()

# obtain weights as list
w_1 = [0,0,0,0,0]
for i in range(len(sol)):
    w_1[i] = sol[i][0]

w_1

[-0.00020991611902451842,
 -0.00044560319896272605,
 -0.0002936465223482605,
 0.0003874364323465634,
 0.004410736419002698]

#### Test 2: Solve with gradient descent

In [37]:
X = train.values

In [39]:
w_2 = [0,0,0,0,0] # weights
y_col_num = int(np.size(X,1) - 1)
N = len(X)
a = 0.0001
iterations = 1000

for h in range(iterations):
    for i in range(N):
        y = X[i, y_col_num]
        for j in range(len(w_2)):
            w_j_grad = -(2)*X[i,j]*(y - (w_2[0]*X[i,0] + w_2[1]*X[i,1] + w_2[2]*X[i,2] + w_2[3]*X[i,3] + w_2[4]*X[i,4]))
            w_2[j] = w_2[j] - (a*w_j_grad)

w_2

TypeError: can only concatenate str (not "float") to str

### Testing accuracy of both methods

In [26]:
# def convert(percentage):
#     if percentage > 0.025:
#         return "Very Much Up"
#     if percentage <= 0.025 and percentage > 0.0010:
#         return "Up"
#     if percentage >= -0.025 and percentage < -0.0010:
#         return "Down"
#     if percentage < -0.025:
#         return "Very Much Down"
#     else:
#         return "Neutral"

def convert(percentage):
    if percentage > 0.0010:
        return "Up"
    if percentage < -0.0010:
        return "Down"
    else:
        return "Neutral"

print ("Testing least squares solution...")

correct_1 = 0
correct_2 = 0

for i in range(len(X_test)):
    predict = w_1[0]*X_test[i,0] + w_1[1]*X_test[i,1] + w_1[2]*X_test[i,2] + w_1[3]*X_test[i,3] + w_1[4]*X_test[i,4]
    magnitude = convert(predict)
    actual = y_test[i][0]
    actual_magnitude = convert(actual)
    if magnitude == actual_magnitude:
        correct_1 += 1
    print('Predicted percent change: ', predict, magnitude)
    print('Actual percent change: ', actual, actual_magnitude)
    print('\n')
    
print ('Accuracy: ', correct_1, '/', len(y_test))
print ('###########################################################')
print ("\nTesting gradient descent...")

for i in range(len(X_test)):
    predict = w_2[0]*X_test[i,0] + w_2[1]*X_test[i,1] + w_2[2]*X_test[i,2] + w_2[3]*X_test[i,3] + w_2[4]*X_test[i,4]
    magnitude = convert(predict)
    actual = y_test[i][0]
    actual_magnitude = convert(actual)
    if magnitude == actual_magnitude:
        correct_2 += 1
    print('Predicted percent change: ', predict, magnitude)
    print('Actual percent change: ', actual, actual_magnitude)
    print('\n')
    
print ('Accuracy: ', correct_2, '/', len(y_test))
print ('###########################################################')

Testing least squares solution...


NameError: name 'w_1' is not defined