In [130]:
import pyodbc
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from sklearn.metrics import precision_score
import pickle
from datetime import datetime
import warnings

def getData(symbol):
    conn = pyodbc.connect(
        "DRIVER={SQL Server};SERVER=lenovo-desktop;DATABASE=Qihuo;UID=samtsql;PWD=F(W}q:TsyK,7^+>`P28e79s#Uc5n")
    query = f"""select m1.date as 'Date',DATEPART(hour, m1.Date) AS 'Hour', m1.[Close] as 'Market', m2.[close] as 'Stock'  from MinuteQuote m1
    inner join MinuteQuote m2 on m1.Date = m2.date
    where m1.Date > '2002-04-08 01:20:00' and m1.Contract = 'ym' and m2.Contract = '{symbol}'
    and DATEPART(minute, m1.Date) % 30 = 0
    order by m1.date"""

    with warnings.catch_warnings():
        warnings.simplefilter('ignore', UserWarning)
        df = pd.read_sql(query, conn)
        df.set_index('Date', inplace=True)
        return df

 


In [131]:
def transformData(df):
    df["StockReturn"] = df.rolling(2).apply(lambda x: x.iloc[1] / x.iloc[0] - 1)["Stock"]
    df["MarketReturn"] = df.rolling(2).apply(lambda x: x.iloc[1] / x.iloc[0] - 1)["Market"]
    df["OutPerform"] = df["StockReturn"] - df["MarketReturn"]
    df["Target"] = (df.apply(lambda x: x > 0)["OutPerform"]).astype(int)



In [132]:
def getPredictors(df):
    predictors = ['Hour']
    for i in range(12):
        df[f'OutPerform{pow(2,i)*5}'] = df['OutPerform'].rolling(pow(2,i)).sum()
        predictors.append(f'OutPerform{pow(2,i)*5}')
        df[f'StockReturn{pow(2,i)*5}'] = df['StockReturn'].rolling(pow(2,i)).sum()
        predictors.append(f'StockReturn{pow(2,i)*5}')

    return predictors    


In [133]:
def getMLdata(df, predictors):
    prev = df.copy()
    prev = prev.shift(1)
    data = df[["OutPerform","Target"]]
    data = data.join(prev[predictors])
    data = data.copy().dropna()

    return data


In [134]:
def predict(train, test, predictors, model):
    model.fit(train[predictors], train["Target"])
    preds = model.predict(test[predictors])#[:,1]
    # preds[preds > .6] = 1
    # preds[preds<=.6] = 0
    # print(preds)
    preds = pd.Series(preds, index=test.index, name="Predictions")
    combined = pd.concat([test["Target"], preds], axis=1)
    return combined

In [135]:
def backtest(data, model, predictors, start=10000, step=1000):
    all_predictions = []
    loop = 0
    for i in range(start, data.shape[0], step):
        train = data.iloc[loop * step:i].copy()
        test = data.iloc[i:(i+step)].copy()
        predictions = predict(train, test, predictors, model)
        all_predictions.append(predictions)
        loop = 0 # loop + 1

    return pd.concat(all_predictions)


In [136]:
def processResult(data, predictions):
    tempData = data[['OutPerform']].copy()
    result1 = pd.merge(tempData, predictions, left_index=True, right_index=True)
    result1['year'] = result1.index.strftime('%Y')
    groupbyyear = result1.groupby('year')
    for group_name, df_group in groupbyyear:
        count = 0
        newtrade = True
        for ind in df_group.index:
            if df_group['Predictions'][ind] == 1:
                if newtrade == True:
                    count = count + 1
                    newtrade = False
            else:
                newtrade = True 
        gsum = df_group[df_group['Predictions'] == 1]['OutPerform'].sum()        
        print(s, group_name, gsum/count)

In [137]:
model = RandomForestClassifier(n_estimators=100, min_samples_split=1200, random_state=1)
symbols = ['CVX',
'HON',
'CRM',
'UNH',
'CSCO',
'WMT',
'AXP',
'JPM',
'MCD',
'HD',
'AMGN',
'V',
'INTC',
'WBA',
'GS',
'JNJ',
'PG',
'AAPL',
'DIS',
'MMM',
'MRK',
'MSFT',
'TRV',
'VZ',
'IBM',
'CAT',
'NKE']
for s in symbols:
    df = getData(s)
    transformData(df)
    predictors = getPredictors(df)
    data = getMLdata(df, predictors)
    
    predictions = backtest(data, model, predictors)
    pickle.dump(model, open(f"{s}", 'wb'))
    processResult(data, predictions)
    precision_score(predictions["Target"], predictions["Predictions"])
    

CVX 2005 -0.00011668314881273993
CVX 2006 0.0007807463008526695
CVX 2007 0.0010347012032612373
CVX 2008 0.001542645415188097
CVX 2009 -0.00014809548891228096
CVX 2010 0.0003110461217696961
CVX 2011 0.0002956006156281807
CVX 2012 3.514164571143245e-06
CVX 2013 -3.279513046907667e-05
CVX 2014 -0.0001801052841125313
CVX 2015 -0.000411001218248323
CVX 2016 0.00013084138347405365
CVX 2017 0.0001522632776302893
CVX 2018 0.00012470607175228031
CVX 2019 0.00016508382484161055
CVX 2020 -3.3043975181140865e-05
CVX 2021 0.00026046434856723564
CVX 2022 0.0011818175557087524


KeyboardInterrupt: 