In [382]:
import pandas as pd

In [383]:
tech_prices = pd.read_csv("Downloads/big_tech_stock_prices.csv")

In [384]:
tech_prices["date"]=pd.to_datetime(tech_prices["date"])

In [385]:
tech_prices["month"] = tech_prices["date"].dt.month

In [386]:
tech_prices["election_season"] = (
    ((tech_prices["date"].dt.year % 4) == 0) & 
    (tech_prices["date"].dt.month > 7) & 
    (tech_prices["date"].dt.month < 12)
).astype(int)


In [387]:
tech_prices["target"] = tech_prices["close"].shift(-1) > tech_prices["close"]

In [388]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score
rf = RandomForestClassifier(n_estimators=150, min_samples_split=10, random_state=1)

In [389]:
group_stocks = tech_prices.groupby("stock_symbol")
group = group_stocks.get_group("AAPL")

In [390]:
def rolling_averages(group, cols, new_cols):
    group = group.sort_values("date")
    rolling_stats = group[cols].rolling(3, closed='left').mean()
    group[new_cols] = rolling_stats
    group = group.dropna(subset=new_cols)
    return group



In [391]:
cols = ["open", "high", "low","adj_close", "close", "volume", "month"]
new_cols = [f"{c}_rolling" for c in cols]

In [392]:
rolling_averages(group, cols, new_cols)

Unnamed: 0,stock_symbol,date,open,high,low,close,adj_close,volume,month,election_season,target,open_rolling,high_rolling,low_rolling,adj_close_rolling,close_rolling,volume_rolling,month_rolling
3,AAPL,2010-01-07,7.562500,7.571429,7.466071,7.520714,6.410790,477131200,1,0,True,7.647738,7.682381,7.575952,6.488118,7.611429,5.492648e+08,1.0
4,AAPL,2010-01-08,7.510714,7.571429,7.466429,7.570714,6.453412,447610800,1,0,False,7.627738,7.652619,7.536309,6.453310,7.570595,5.437320e+08,1.0
5,AAPL,2010-01-11,7.600000,7.607143,7.444643,7.503929,6.396483,462229600,1,0,False,7.576548,7.609881,7.486429,6.428955,7.542024,4.923007e+08,1.0
6,AAPL,2010-01-12,7.471071,7.491786,7.372143,7.418571,6.323721,594459600,1,0,True,7.557738,7.583334,7.459048,6.420228,7.531786,4.623239e+08,1.0
7,AAPL,2010-01-13,7.423929,7.533214,7.289286,7.523214,6.412922,605892000,1,0,False,7.527262,7.556786,7.427738,6.391205,7.497738,5.014333e+08,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3266,AAPL,2022-12-22,134.350006,134.559998,130.300003,132.229996,132.229996,77852100,12,0,False,133.159999,135.086665,131.320002,133.373332,133.373332,8.098447e+07,12.0
3267,AAPL,2022-12-23,130.919998,132.419998,129.639999,131.860001,131.860001,63814900,12,0,False,132.906667,134.873332,130.980001,133.326665,133.326665,8.040430e+07,12.0
3268,AAPL,2022-12-27,131.380005,131.410004,128.720001,130.029999,130.029999,69007800,12,0,False,132.750000,134.596665,130.896667,133.179998,133.179998,7.586500e+07,12.0
3269,AAPL,2022-12-28,129.669998,131.029999,125.870003,126.040001,126.040001,85438400,12,0,True,132.216670,132.796667,129.553334,131.373332,131.373332,7.022493e+07,12.0


In [393]:
prices_rolling = tech_prices.groupby("stock_symbol").apply(lambda x: rolling_averages(x, cols, new_cols))
prices_rolling.index = range(prices_rolling.shape[0])

  prices_rolling = tech_prices.groupby("stock_symbol").apply(lambda x: rolling_averages(x, cols, new_cols))


In [395]:
def make_predictions(data, predictors):
    train = data[data["date"] < '2022-01-01']
    test = data[data["date"] > '2022-01-01']
    rf.fit(train[predictors], train["target"])
    preds = rf.predict(test[predictors])
    combined = pd.DataFrame(dict(actual=test["target"], predicted=preds),index=test.index)
    precision = precision_score(test["target"],preds)
    return combined, precision
 


In [396]:
train = tech_prices[tech_prices["date"] < '2022-01-01']
test = tech_prices[tech_prices["date"] > '2022-01-01']
rf.fit(train[cols], train["target"])
preds = rf.predict(test[cols])
acc = accuracy_score(test["target"],preds)

In [397]:
acc

0.5096700796359499

In [398]:
combined = pd.DataFrame(dict(actual = test["target"], prediction = preds))
pd.crosstab(index = combined["actual"], columns=combined["prediction"])

prediction,False,True
actual,Unnamed: 1_level_1,Unnamed: 2_level_1
False,716,1167
True,557,1076


In [399]:
combined, precision = make_predictions(prices_rolling, new_cols)

In [400]:
precision

np.float64(0.4727272727272727)

In [401]:
combined

Unnamed: 0,actual,predicted
3018,False,False
3019,False,True
3020,False,True
3021,True,True
3022,True,True
...,...,...
45041,False,True
45042,False,True
45043,True,True
45044,True,True


In [402]:
combined = combined.merge(prices_rolling[["date","stock_symbol","open","close"]], left_index=True, right_index=True)

In [403]:
combined

Unnamed: 0,actual,predicted,date,stock_symbol,open,close
3018,False,False,2022-01-03,AAPL,177.830002,182.009995
3019,False,True,2022-01-04,AAPL,182.630005,179.699997
3020,False,True,2022-01-05,AAPL,179.610001,174.919998
3021,True,True,2022-01-06,AAPL,172.699997,172.000000
3022,True,True,2022-01-07,AAPL,172.889999,172.169998
...,...,...,...,...,...,...
45041,False,True,2022-12-22,TSLA,136.000000,125.349998
45042,False,True,2022-12-23,TSLA,126.370003,123.150002
45043,True,True,2022-12-27,TSLA,117.500000,109.099998
45044,True,True,2022-12-28,TSLA,110.349998,112.709999
