# Stock-Bot

Decision to buy/sell is made when a certain SMA is crossed.

- get gradiant of all SMAs at that point
- guess price when the stock cross that SMA again

### How?

- when stock cross -> ask experts

In [62]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostRegressor, AdaBoostClassifier
from sklearn.metrics import *

pd.options.mode.chained_assignment = None

## Pre-processing

In [82]:
companies_df = pd.read_csv('data/sp500_companies.csv').sort_values(by='Weight', ascending=False)
index_df = pd.read_csv('data/sp500_index.csv')
stocks_df = pd.read_csv('data/sp500_stocks.csv')

### Calculate values for a single company

In [84]:
drop_cols = ['Symbol', 'Adj Close', 'High', 'Low', 'Open', 'Volume']

def create_company_df(symbol: str):
    ma_sizes = [5, 10, 30, 50]
    company_df = stocks_df[stocks_df.Symbol == symbol]
    company_df = company_df.sort_values(by='Date').drop(columns=drop_cols)
    # company_df = company_df.tail(365) # NOTE: gets last year of data
    for size in ma_sizes:
        company_df[f'SMA{size}'] = company_df['Close'].rolling(size).mean()
    company_df.dropna(inplace=True)

    # Calculate SMA gradients
    for size in ma_sizes:
        company_df[f'SMA{size}_diff'] = company_df[f'SMA{size}'] - company_df[f'SMA{size}'].shift(1)

    # Calculate Close value in 14 days time
    company_df['future_close'] = company_df['Close'].shift(-14) - company_df['Close']
    company_df.dropna(inplace=True)
    return company_df

    # Plot
    # company_df[['Close', 'SMA5', 'SMA10', 'SMA30', 'SMA50']].plot(label='AAPL', figsize=(12, 4))

### Data prep

In [85]:
df = create_company_df('AAPL')
symbols = list(companies_df.Symbol.unique())
for symbol in symbols[1:400]:
    df = pd.concat((df, create_company_df(symbol)), axis=0, ignore_index=True)

df = df[['Date', 'SMA5_diff', 'SMA10_diff', 'SMA30_diff', 'SMA50_diff', 'future_close']].sort_values(by='Date').drop(columns=['Date'])

df['outcome'] = df['future_close'] > 0
df.shape

### Train models

#### Regression

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns=['future_close', 'outcome']), df['future_close'], test_size = 0.2)

In [None]:
def print_statistics(regression_model, name):
    print(f'---------- {name} ----------')
    print(f'Max Error: {max_error(y_test, regression_model.predict(X_test))}')
    print(f'MSE:       {mean_squared_error(y_test, regression_model.predict(X_test))}')
    print(f'MAE:       {mean_absolute_error(y_test, regression_model.predict(X_test))}')
    print(f'R2 Score:  {regression_model.score(X_test, y_test)}')

In [None]:
adaboost = AdaBoostRegressor(n_estimators=50)
adaboost.fit(X_train, y_train)
print_statistics(adaboost, 'AdaBoost')

---------- AdaBoost ----------
Max Error: 649.1071535548535
MSE:       186.5157276919058
MAE:       5.398024358901793
R2 Score:  0.016324679749587512


#### Classification

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns=['future_close', 'outcome']), df['outcome'], test_size = 0.2)

In [None]:
ac = AdaBoostClassifier(n_estimators=50)
ac.fit(X_train, y_train)
ac.score(X_test, y_test)

0.5747523722656938