In [47]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, log_loss, precision_score

In [44]:
gme_df = pd.read_csv('GME_df.csv')
amc_df = pd.read_csv('AMC_df.csv')
pltr_df = pd.read_csv('PLTR_df.csv')
nok_df = pd.read_csv('NOK_df.csv')
tsla_df = pd.read_csv('TSLA_df.csv')

In [45]:
gme_df

Unnamed: 0.1,Unnamed: 0,Date,Open,Adj Close,Volume,pct_change_c2c,pct_change_intraday,10d_realised_vol,count,pos,neg,net,abs,pct_change_c2c_next,pct_change_intraday_next,Volume_next,down_next,Short Volume,Short Exempt Volume
0,0,2021-01-28,265.0,193.600006,58815800.0,-44.289373,-26.943394,641.061041,454.0,0.020153,0.027926,-0.007773,0.048079,67.871896,-14.408362,50259200.0,0,9606123,455032
1,1,2021-01-29,379.709991,325.0,50259200.0,67.871896,-14.408362,706.293015,4983.0,0.021446,0.028455,-0.007009,0.049902,-30.769231,-28.923426,37382200.0,1,8814229,527920
2,2,2021-02-01,316.559998,225.0,37382200.0,-30.769231,-28.923426,737.844005,370.0,0.020451,0.02524,-0.004788,0.045691,-60.0,-36.061379,78183100.0,1,6982444,364890
3,3,2021-02-02,140.759995,90.0,78183100.0,-60.0,-36.061379,816.446819,472.0,0.021595,0.026473,-0.004879,0.048068,2.677782,-17.498436,42698500.0,0,16358136,1073011
4,4,2021-02-03,112.010002,92.410004,42698500.0,2.677782,-17.498436,819.755654,782.0,0.022034,0.024754,-0.00272,0.046788,-42.105835,-41.331288,62427300.0,1,9638240,532939
5,5,2021-02-04,91.190002,53.5,62427300.0,-42.105835,-41.331288,871.13317,601.0,0.021787,0.027398,-0.005611,0.049184,19.196263,18.00518,80886300.0,0,14272780,869183
6,6,2021-02-05,54.040001,63.77,80886300.0,19.196263,18.00518,820.118149,1028.0,0.023072,0.029187,-0.006116,0.052259,-5.911871,-17.138521,25687300.0,1,19063724,1106467
7,7,2021-02-08,72.410004,60.0,25687300.0,-5.911871,-17.138521,800.584685,104.0,0.019375,0.023233,-0.003858,0.042608,-16.149998,-11.128774,26843100.0,1,6404809,82425
8,8,2021-02-09,56.610001,50.310001,26843100.0,-16.149998,-11.128774,678.092879,63.0,0.022518,0.024938,-0.00242,0.047456,1.769031,0.846957,36455000.0,0,6516142,277522
9,9,2021-02-10,50.77,51.200001,36455000.0,1.769031,0.846957,465.186045,90.0,0.018686,0.021541,-0.002855,0.040227,-0.195317,2.179564,12997400.0,1,6587323,241069


# Linear Regression

In [46]:
from sklearn.linear_model import LinearRegression

In [5]:
def linear_reg_pctchangenext(df):
    
    features = ['Open','Adj Close','Volume','pct_change_c2c','pct_change_intraday','10d_realised_vol','count','pos','neg','net','abs', 'Short Volume','Short Exempt Volume']
    df.drop(df.tail(1).index,inplace=True)
    
    X=df[features]
    y=df['pct_change_c2c_next']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    reg = LinearRegression().fit(X_train, y_train)
    
    coefs = pd.DataFrame(reg.coef_.reshape(1,13), columns=features)
    coefs.insert(loc=0, column="Intercept", value=reg.intercept_)
    coefs = coefs.T
    coefs=coefs.rename(columns={0:'Coefficient'})
    coefs=coefs.round(4)
    
    print("Regression Summary")
    display(coefs)
    
    y_pred = reg.predict(X_test)
            
    y_test_df=y_test.to_frame()
    y_test_df.insert(loc=1,column="y_pred", value = y_pred)
    display(y_test_df)

    print("Coefficient R-squared: {}".format(reg.score(X_test, y_test)))

In [6]:
def linear_reg_volumenext(df):
    
    features = ['Open','Adj Close','Volume','pct_change_c2c','pct_change_intraday','10d_realised_vol','count','pos','neg','net','abs', 'Short Volume','Short Exempt Volume']
    df.drop(df.tail(1).index,inplace=True)
    
    X=df[features]
    y=df['Volume_next']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    reg = LinearRegression().fit(X_train, y_train)
    
    coefs = pd.DataFrame(reg.coef_.reshape(1,13), columns=features)
    coefs.insert(loc=0, column="Intercept", value=reg.intercept_)
    coefs = coefs.T
    coefs=coefs.rename(columns={0:'Coefficient'})
#     coefs=coefs.round(1)
    
    print("Regression Summary")
    display(coefs)
    
    y_pred = reg.predict(X_test)
            
    y_test_df=y_test.to_frame()
    y_test_df.insert(loc=1,column="y_pred", value = y_pred)
    display(y_test_df)

    print("Coefficient R-squared: {}".format(reg.score(X_test, y_test)))

In [7]:
linear_reg_pctchangenext(gme_df)

Regression Summary


Unnamed: 0,Coefficient
Intercept,22.5032
Open,-0.3261
Adj Close,0.2361
Volume,0.0
pct_change_c2c,-1.6904
pct_change_intraday,1.209
10d_realised_vol,-0.0598
count,0.0316
pos,339.2174
neg,-299.5702


Unnamed: 0,pct_change_c2c_next,y_pred
37,-33.788173,19.355091
24,4.072534,19.247297
25,41.208068,24.360285
36,-6.550468,3.836602
34,-0.733579,1.46353
39,-1.496599,10.45216
4,-42.105835,-34.3395
12,-7.210664,14.862086
8,1.769031,-0.783428


Coefficient R-squared: 0.07962085083062242


In [8]:
linear_reg_pctchangenext(amc_df)

Regression Summary


Unnamed: 0,Coefficient
Intercept,3.92
Open,5.6493
Adj Close,-7.1138
Volume,0.0
pct_change_c2c,-0.0192
pct_change_intraday,0.6143
10d_realised_vol,-0.0388
count,-0.0029
pos,182.4322
neg,104.9281


Unnamed: 0,pct_change_c2c_next,y_pred
37,-15.38461,5.774082
24,0.249072,10.416805
25,15.403723,12.601165
36,-14.651721,11.679635
34,-0.499998,-7.033783
39,-6.398536,7.611139
4,-20.958752,11.061681
12,-1.76991,-0.104407
8,5.454549,-3.286933


Coefficient R-squared: -1.5034338221160217


In [9]:
linear_reg_pctchangenext(pltr_df)

Regression Summary


Unnamed: 0,Coefficient
Intercept,1.7244
Open,-1.6088
Adj Close,1.5956
Volume,0.0
pct_change_c2c,-0.0533
pct_change_intraday,-0.228
10d_realised_vol,-0.0139
count,-0.0644
pos,-23.394
neg,-8.6015


Unnamed: 0,pct_change_c2c_next,y_pred
25,-5.970774,-0.210739
13,-7.053175,-0.199616
8,-6.471044,-1.040278
26,7.726464,-0.062489
4,0.913095,-3.413394
39,0.0,1.359757
19,-0.250415,-0.198399
29,0.710814,0.668322
30,-0.482909,0.125511


Coefficient R-squared: -0.09002294299352953


In [10]:
linear_reg_pctchangenext(nok_df)

Regression Summary


Unnamed: 0,Coefficient
Intercept,33.2025
Open,-638.7425
Adj Close,629.3206
Volume,0.0
pct_change_c2c,0.2695
pct_change_intraday,-25.9518
10d_realised_vol,0.0095
count,0.0459
pos,15.63
neg,8.5252


Unnamed: 0,pct_change_c2c_next,y_pred
17,0.24753,-0.924202
13,-1.213585,-1.941356
4,-7.021275,18.327876
29,0.943407,-5.801263
35,-0.985221,-0.247355
25,3.53535,0.542369
6,0.947878,-0.40321
26,2.926827,-0.050245


Coefficient R-squared: -8.518517316602972


In [11]:
linear_reg_pctchangenext(tsla_df)

Regression Summary


Unnamed: 0,Coefficient
Intercept,-26.4704
Open,-0.7621
Adj Close,0.7726
Volume,0.0
pct_change_c2c,-0.0739
pct_change_intraday,-5.5577
10d_realised_vol,0.0113
count,0.0962
pos,202.0589
neg,-47.0704


Unnamed: 0,pct_change_c2c_next,y_pred
37,-4.81605,-1.228922
24,-3.779929,17.117586
25,-5.844972,22.163674
36,-1.170153,4.033507
34,0.261808,1.796976
39,-3.385436,-6.097406
4,-0.549908,-4.636269
12,0.242402,-1.14298
8,-5.255105,-2.56684


Coefficient R-squared: -26.632399899357456


In [12]:
linear_reg_volumenext(gme_df)

Regression Summary


Unnamed: 0,Coefficient
Intercept,21264.11
Open,607485.2
Adj Close,-642849.0
Volume,1.29935
pct_change_c2c,500327.4
pct_change_intraday,289721.1
10d_realised_vol,33867.08
count,-14313.66
pos,860056300.0
neg,-615723700.0


Unnamed: 0,Volume_next,y_pred
25,63424800.0,27320100.0
13,23990600.0,6347275.0
8,36455000.0,30308950.0
26,38725800.0,30111650.0
4,62427300.0,47815310.0
39,37371900.0,40213240.0
19,91963000.0,76358350.0
29,25760700.0,6529932.0
30,24100400.0,34097330.0


Coefficient R-squared: 0.3763016364387448


In [13]:
linear_reg_volumenext(amc_df)

Regression Summary


Unnamed: 0,Coefficient
Intercept,9953958.0
Open,202789500.0
Adj Close,-207197200.0
Volume,0.7122509
pct_change_c2c,-956153.2
pct_change_intraday,25089320.0
10d_realised_vol,60736.27
count,30646.74
pos,472928100.0
neg,720454700.0


Unnamed: 0,Volume_next,y_pred
25,113872800.0,84516480.0
13,130540800.0,56286280.0
8,152810800.0,122050200.0
26,148428200.0,142517200.0
4,162985800.0,234650200.0
39,83778100.0,247284300.0
19,135675300.0,153706700.0
29,110443600.0,57626630.0
30,277713300.0,128058600.0


Coefficient R-squared: -1.6748831367418866


In [14]:
linear_reg_volumenext(pltr_df)

Regression Summary


Unnamed: 0,Coefficient
Intercept,-51321270.0
Open,-60358280.0
Adj Close,63323900.0
Volume,1.329016
pct_change_c2c,-2476359.0
pct_change_intraday,-17640380.0
10d_realised_vol,153036.2
count,-64793.43
pos,-49016120.0
neg,131732700.0


Unnamed: 0,Volume_next,y_pred
24,172875900.0,95407260.0
13,313175100.0,110107200.0
8,45177200.0,104087200.0
25,93973900.0,102147500.0
4,31565000.0,52279770.0
40,52039700.0,46155010.0
19,119588600.0,165562200.0
39,57689500.0,28375430.0
29,58489200.0,63143570.0


Coefficient R-squared: 0.15860011857048706


In [15]:
linear_reg_volumenext(nok_df)

Regression Summary


Unnamed: 0,Coefficient
Intercept,-74766670.0
Open,-1515020000.0
Adj Close,1541589000.0
Volume,0.8205089
pct_change_c2c,-60388.47
pct_change_intraday,-55563990.0
10d_realised_vol,38473.52
count,115309.5
pos,-22821790.0
neg,-260095400.0


Unnamed: 0,Volume_next,y_pred
35,26233600.0,21998810.0
13,28413700.0,56113430.0
26,42858300.0,55245190.0
30,83759200.0,67319010.0
16,77998700.0,81059300.0
31,31188900.0,68359270.0
21,41232500.0,47803800.0
12,43669200.0,67269130.0


Coefficient R-squared: 0.05888797498174547


In [16]:
linear_reg_volumenext(tsla_df)

Regression Summary


Unnamed: 0,Coefficient
Intercept,44520030.0
Open,-497474.2
Adj Close,450495.7
Volume,0.7207692
pct_change_c2c,208622.1
pct_change_intraday,-4125491.0
10d_realised_vol,-21860.72
count,46028.13
pos,-69476830.0
neg,175120600.0


Unnamed: 0,Volume_next,y_pred
25,51787000.0,89350940.0
13,17957100.0,28175240.0
8,36216100.0,20134680.0
26,67523300.0,63477940.0
4,15812700.0,18375620.0
39,33778400.0,38807070.0
19,41089200.0,30103300.0
29,33583800.0,42835150.0
30,29335600.0,31852420.0


Coefficient R-squared: 0.010307841019571717


# Ridge Regression

In [17]:
from sklearn.linear_model import Ridge
from sklearn.linear_model import RidgeClassifier

In [18]:
def ridge_reg_pctchangenext(df):
    
    features = ['Open','Adj Close','Volume','pct_change_c2c','pct_change_intraday','10d_realised_vol','count','pos','neg','net','abs','Short Volume','Short Exempt Volume']
    df.drop(df.tail(1).index,inplace=True)
    
    X=df[features]
    y=df['pct_change_c2c_next']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    rid = Ridge(alpha=1.0).fit(X_train, y_train)
    
    coefs = pd.DataFrame(rid.coef_.reshape(1,13), columns=features)
    coefs.insert(loc=0, column="Intercept", value=rid.intercept_)
    coefs = coefs.T
    coefs=coefs.rename(columns={0:'Coefficient'})
    coefs=coefs.round(4)
    
    print("Regression Summary")
    display(coefs)
    
    y_pred = rid.predict(X_test)
            
    y_test_df=y_test.to_frame()
    y_test_df.insert(loc=1,column="y_pred", value = y_pred)
    display(y_test_df)

    print("Coefficient R-squared: {}".format(rid.score(X_test, y_test)))

In [19]:
def ridge_reg_volumenext(df):
    
    features = ['Open','Adj Close','Volume','pct_change_c2c','pct_change_intraday','10d_realised_vol','count','pos','neg','net','abs','Short Volume','Short Exempt Volume']
    df.drop(df.tail(1).index,inplace=True)
    
    X=df[features]
    y=df['Volume_next']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    rid = Ridge(alpha=1.0).fit(X_train, y_train)
    
    coefs = pd.DataFrame(rid.coef_.reshape(1,13), columns=features)
    coefs.insert(loc=0, column="Intercept", value=rid.intercept_)
    coefs = coefs.T
    coefs=coefs.rename(columns={0:'Coefficient'})
#     coefs=coefs.round(4)
    
    print("Regression Summary")
    display(coefs)
    
    y_pred = rid.predict(X_test)
            
    y_test_df=y_test.to_frame()
    y_test_df.insert(loc=1,column="y_pred", value = y_pred)
    display(y_test_df)

    print("Coefficient R-squared: {}".format(rid.score(X_test, y_test)))

In [20]:
ridge_reg_pctchangenext(gme_df)

Regression Summary


  return linalg.solve(A, Xy, sym_pos=True,


Unnamed: 0,Coefficient
Intercept,22.7164
Open,-0.3478
Adj Close,0.2503
Volume,0.0
pct_change_c2c,-1.5858
pct_change_intraday,1.0636
10d_realised_vol,-0.0682
count,0.0297
pos,-0.1613
neg,0.1387


Unnamed: 0,pct_change_c2c_next,y_pred
24,4.072534,15.822719
13,-11.42795,17.791998
8,1.769031,-3.411443
25,41.208068,19.555062
4,-42.105835,-35.771551
40,0.165748,1.965975
19,-6.428773,31.17209
39,-1.496599,7.115825
29,1.730769,13.458543


Coefficient R-squared: 0.13533664981655513


In [21]:
ridge_reg_pctchangenext(amc_df)

Regression Summary


  return linalg.solve(A, Xy, sym_pos=True,


Unnamed: 0,Coefficient
Intercept,21.5472
Open,-3.8246
Adj Close,1.7331
Volume,0.0
pct_change_c2c,0.0632
pct_change_intraday,-0.4859
10d_realised_vol,-0.0367
count,-0.0044
pos,0.56
neg,0.2517


Unnamed: 0,pct_change_c2c_next,y_pred
24,0.249072,6.841159
13,-0.72072,5.736259
8,5.454549,-4.709211
25,15.403723,8.145896
4,-20.958752,3.17736
40,1.074225,-2.448402
19,-3.37756,16.13216
39,-6.398536,2.433279
29,8.560313,-5.797802


Coefficient R-squared: -0.8004644390618938


In [22]:
ridge_reg_pctchangenext(pltr_df)

Regression Summary


  return linalg.solve(A, Xy, sym_pos=True,


Unnamed: 0,Coefficient
Intercept,-2.1066
Open,0.329
Adj Close,-0.5573
Volume,0.0
pct_change_c2c,0.3149
pct_change_intraday,-0.1355
10d_realised_vol,0.0604
count,-0.0729
pos,-0.317
neg,-0.0412


Unnamed: 0,pct_change_c2c_next,y_pred
19,-0.250415,3.111403
16,-4.464286,8.293366
15,-3.448276,9.916779
26,7.726464,-2.642194
4,0.913095,-1.981346
12,-2.729886,0.889695
37,-5.932937,-3.289491
27,2.019785,2.410438


Coefficient R-squared: -2.637174835154208


In [23]:
ridge_reg_pctchangenext(nok_df)

Regression Summary


  return linalg.solve(A, Xy, sym_pos=True,


Unnamed: 0,Coefficient
Intercept,16.3004
Open,-2.0047
Adj Close,-2.1133
Volume,0.0
pct_change_c2c,-0.1095
pct_change_intraday,-0.296
10d_realised_vol,0.0055
count,0.0107
pos,0.1927
neg,-0.0406


Unnamed: 0,pct_change_c2c_next,y_pred
26,2.926827,-1.32298
13,-1.213585,-1.390853
24,0.0,-0.668831
21,-1.485147,-0.971596
15,-0.252525,0.291646
29,0.943407,-1.678275
19,3.061222,-1.090152


Coefficient R-squared: -1.127723314300629


In [24]:
ridge_reg_pctchangenext(tsla_df)

Regression Summary


  return linalg.solve(A, Xy, sym_pos=True,


Unnamed: 0,Coefficient
Intercept,-35.415
Open,-0.7771
Adj Close,0.8077
Volume,0.0
pct_change_c2c,-0.1462
pct_change_intraday,-5.8017
10d_realised_vol,0.0299
count,0.0855
pos,0.0923
neg,-0.0781


Unnamed: 0,pct_change_c2c_next,y_pred
24,-3.779929,15.011326
13,-1.349373,0.696596
8,-5.255105,-4.358588
25,-5.844972,23.157949
4,-0.549908,-5.048496
40,-1.199277,0.169092
19,-0.985015,-1.38036
39,-3.385436,-6.742561
29,-0.83905,1.230904


Coefficient R-squared: -36.46302234300281


In [25]:
ridge_reg_volumenext(gme_df)

Regression Summary


  return linalg.solve(A, Xy, sym_pos=True,


Unnamed: 0,Coefficient
Intercept,40001190.0
Open,899007.6
Adj Close,-1009328.0
Volume,0.3283416
pct_change_c2c,-48893.08
pct_change_intraday,1214127.0
10d_realised_vol,-8904.362
count,-1183.098
pos,171805.5
neg,-265964.8


Unnamed: 0,Volume_next,y_pred
19,91963000.0,65778100.0
16,7565200.0,35385480.0
15,19476000.0,37666590.0
26,38725800.0,21106540.0
4,62427300.0,30170650.0
12,9186800.0,29466120.0
37,24177900.0,26965060.0
27,71361900.0,6391354.0


Coefficient R-squared: -0.13461525296738874


In [26]:
ridge_reg_volumenext(amc_df)

Regression Summary


  return linalg.solve(A, Xy, sym_pos=True,


Unnamed: 0,Coefficient
Intercept,55043360.0
Open,60410550.0
Adj Close,-62477840.0
Volume,1.178486
pct_change_c2c,-4145114.0
pct_change_intraday,11709600.0
10d_realised_vol,53001.74
count,156606.5
pos,-253668.3
neg,-1648898.0


Unnamed: 0,Volume_next,y_pred
19,135675300.0,212314900.0
16,264876400.0,219187000.0
15,173409000.0,95061710.0
26,148428200.0,113832800.0
4,162985800.0,68407960.0
12,38849000.0,50748390.0
37,81850700.0,100425100.0
27,256641600.0,169422800.0


Coefficient R-squared: 0.23012874874583933


In [27]:
ridge_reg_volumenext(pltr_df)

Regression Summary


  return linalg.solve(A, Xy, sym_pos=True,


Unnamed: 0,Coefficient
Intercept,29207800.0
Open,-8146653.0
Adj Close,8343328.0
Volume,1.215334
pct_change_c2c,-2207022.0
pct_change_intraday,-1937659.0
10d_realised_vol,-42955.32
count,919920.2
pos,-388743.3
neg,-722542.9


Unnamed: 0,Volume_next,y_pred
33,57141400.0,83480210.0
36,50169700.0,62364330.0
4,31565000.0,34701350.0
13,313175100.0,131535700.0
30,56101500.0,74744740.0
26,99106800.0,79414030.0
6,72376000.0,52949980.0
27,102847800.0,90583150.0


Coefficient R-squared: 0.38522548504651644


In [28]:
ridge_reg_volumenext(nok_df)

Regression Summary


  return linalg.solve(A, Xy, sym_pos=True,


Unnamed: 0,Coefficient
Intercept,15394220.0
Open,360759.2
Adj Close,1224775.0
Volume,0.7916648
pct_change_c2c,2012529.0
pct_change_intraday,7547314.0
10d_realised_vol,125748.9
count,22938.08
pos,84008.68
neg,-1429444.0


Unnamed: 0,Volume_next,y_pred
15,68365800.0,45309230.0
19,39737000.0,36124120.0
27,59527500.0,85051360.0
26,42858300.0,67854000.0
8,79240200.0,79994650.0
24,48009600.0,68132360.0
21,41232500.0,42318230.0


Coefficient R-squared: -0.5907595967534245


In [29]:
ridge_reg_volumenext(tsla_df)

Regression Summary


  return linalg.solve(A, Xy, sym_pos=True,


Unnamed: 0,Coefficient
Intercept,69033650.0
Open,-1126505.0
Adj Close,1060648.0
Volume,0.213624
pct_change_c2c,-1004778.0
pct_change_intraday,-7845999.0
10d_realised_vol,81562.58
count,14668.09
pos,51030.91
neg,-53737.85


Unnamed: 0,Volume_next,y_pred
19,41089200.0,39991330.0
16,66606900.0,36111940.0
15,37269700.0,22289620.0
26,67523300.0,61806700.0
4,15812700.0,18705300.0
12,25996500.0,21675260.0
37,33795200.0,37682810.0
27,60605700.0,16870830.0


Coefficient R-squared: -0.20179238840206692


# Lasso Regression

In [30]:
from sklearn.linear_model import Lasso

In [31]:
def lasso_reg_pctchangenext(df):
    
    features = ['Open','Adj Close','Volume','pct_change_c2c','pct_change_intraday','10d_realised_vol','count','pos','neg','net','abs','Short Volume','Short Exempt Volume']
    df.drop(df.tail(1).index,inplace=True)
    
    X=df[features]
    y=df['pct_change_c2c_next']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    reg = Lasso().fit(X_train, y_train)
    
    coefs = pd.DataFrame(reg.coef_.reshape(1,13), columns=features)
    coefs.insert(loc=0, column="Intercept", value=reg.intercept_)
    coefs = coefs.T
    coefs=coefs.rename(columns={0:'Coefficient'})
    coefs=coefs.round(4)
    
    print("Regression Summary")
    display(coefs)
    
    y_pred = reg.predict(X_test)
            
    y_test_df=y_test.to_frame()
    y_test_df.insert(loc=1,column="y_pred", value = y_pred)
    display(y_test_df)

    print("Coefficient R-squared: {}".format(reg.score(X_test, y_test)))

In [32]:
def lasso_reg_volumenext(df):
    
    features = ['Open','Adj Close','Volume','pct_change_c2c','pct_change_intraday','10d_realised_vol','count','pos','neg','net','abs','Short Volume','Short Exempt Volume']
    df.drop(df.tail(1).index,inplace=True)
    
    X=df[features]
    y=df['Volume_next']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    reg = Lasso().fit(X_train, y_train)
    
    coefs = pd.DataFrame(reg.coef_.reshape(1,13), columns=features)
    coefs.insert(loc=0, column="Intercept", value=reg.intercept_)
    coefs = coefs.T
    coefs=coefs.rename(columns={0:'Coefficient'})
    coefs=coefs.round(4)
    
    print("Regression Summary")
    display(coefs)
    
    y_pred = reg.predict(X_test)
            
    y_test_df=y_test.to_frame()
    y_test_df.insert(loc=1,column="y_pred", value = y_pred)
    display(y_test_df)

    print("Coefficient R-squared: {}".format(reg.score(X_test, y_test)))

In [33]:
lasso_reg_pctchangenext(gme_df)

Regression Summary


Unnamed: 0,Coefficient
Intercept,36.0153
Open,-0.0
Adj Close,-0.188
Volume,0.0
pct_change_c2c,-1.983
pct_change_intraday,1.8223
10d_realised_vol,-0.081
count,0.0404
pos,-0.0
neg,0.0


Unnamed: 0,pct_change_c2c_next,y_pred
33,-3.84157,-17.525242
36,-6.550468,-5.633793
4,-42.105835,-41.766617
13,-11.42795,22.005914
30,-16.771267,-30.598937
26,26.940871,-18.815971
6,-5.911871,13.274054
27,7.330906,-43.074161


Coefficient R-squared: -1.3943908574387072


In [34]:
lasso_reg_pctchangenext(amc_df)

Regression Summary


Unnamed: 0,Coefficient
Intercept,19.2896
Open,-0.0255
Adj Close,-1.5939
Volume,0.0
pct_change_c2c,0.0
pct_change_intraday,0.0
10d_realised_vol,-0.0309
count,-0.0022
pos,0.0
neg,0.0


Unnamed: 0,pct_change_c2c_next,y_pred
33,3.244835,-1.691769
36,-14.651721,7.881121
4,-20.958752,8.259172
13,-0.72072,5.466598
30,25.806453,-0.452295
26,13.024758,4.931041
6,-9.516839,-11.938597
27,-6.190473,2.17031


Coefficient R-squared: -0.394394500068181


In [35]:
lasso_reg_pctchangenext(pltr_df)

Regression Summary


Unnamed: 0,Coefficient
Intercept,-0.3325
Open,-0.0
Adj Close,-0.1581
Volume,0.0
pct_change_c2c,-0.0
pct_change_intraday,-0.0
10d_realised_vol,0.027
count,-0.0379
pos,-0.0
neg,-0.0


Unnamed: 0,pct_change_c2c_next,y_pred
33,-4.879015,-1.668277
36,-3.963663,-2.13824
4,0.913095,-3.396123
13,-7.053175,-1.876819
30,-0.482909,-1.980095
26,7.726464,-2.272354
6,5.873715,-3.659632
27,2.019785,-0.424852


Coefficient R-squared: -0.36838345546663054


In [36]:
lasso_reg_pctchangenext(nok_df)

Regression Summary


  model = cd_fast.enet_coordinate_descent(


Unnamed: 0,Coefficient
Intercept,-0.1016
Open,-0.0
Adj Close,-0.0
Volume,0.0
pct_change_c2c,-0.2158
pct_change_intraday,-0.038
10d_realised_vol,0.0
count,0.0136
pos,0.0
neg,-0.0


Unnamed: 0,pct_change_c2c_next,y_pred
31,0.995024,0.766345
15,-0.252525,0.332359
26,2.926827,-1.233517
17,0.24753,-0.836851
8,-0.477327,-0.872912
9,-1.678661,-1.194326
19,3.061222,-2.426871


Coefficient R-squared: -1.6303658526782367


In [37]:
lasso_reg_pctchangenext(tsla_df)

Regression Summary


Unnamed: 0,Coefficient
Intercept,-11.9642
Open,0.0097
Adj Close,0.0
Volume,0.0
pct_change_c2c,-0.1053
pct_change_intraday,0.0
10d_realised_vol,0.0227
count,0.0428
pos,0.0
neg,-0.0


Unnamed: 0,pct_change_c2c_next,y_pred
33,-6.932079,-2.793995
36,-1.170153,-3.217118
4,-0.549908,-1.442013
13,-1.349373,-2.091876
30,2.048351,-1.621186
26,19.641211,-1.897868
6,1.313026,2.017757
27,-0.819505,-3.020783


Coefficient R-squared: -0.1881551499488323


In [38]:
lasso_reg_volumenext(gme_df)

Regression Summary


  model = cd_fast.enet_coordinate_descent(


Unnamed: 0,Coefficient
Intercept,-3690694.0
Open,837200.6
Adj Close,-923699.4
Volume,0.2625
pct_change_c2c,-74078.77
pct_change_intraday,1284656.0
10d_realised_vol,21451.32
count,-819.7537
pos,1681682000.0
neg,-636730100.0


Unnamed: 0,Volume_next,y_pred
33,11764900.0,11712710.0
36,14429100.0,9753525.0
4,62427300.0,40478690.0
13,23990600.0,7017508.0
30,24100400.0,18169800.0
26,38725800.0,37514730.0
6,25687300.0,79203810.0
27,71361900.0,18407860.0


Coefficient R-squared: -0.9271449642949945


In [39]:
lasso_reg_volumenext(amc_df)

Regression Summary


  model = cd_fast.enet_coordinate_descent(


Unnamed: 0,Coefficient
Intercept,43505820.0
Open,190065200.0
Adj Close,-192428200.0
Volume,0.6534
pct_change_c2c,-3329231.0
pct_change_intraday,26082400.0
10d_realised_vol,95561.24
count,107977.2
pos,-1681066000.0
neg,3640821000.0


Unnamed: 0,Volume_next,y_pred
33,121418000.0,67884510.0
36,87923200.0,101232300.0
4,162985800.0,190633900.0
13,130540800.0,94744660.0
30,277713300.0,121547400.0
26,148428200.0,109965600.0
6,128171500.0,210554100.0
27,256641600.0,171804000.0


Coefficient R-squared: -0.41497737999240747


In [40]:
lasso_reg_volumenext(pltr_df)

Regression Summary


  model = cd_fast.enet_coordinate_descent(


Unnamed: 0,Coefficient
Intercept,28486950.0
Open,-12722690.0
Adj Close,13310400.0
Volume,1.3399
pct_change_c2c,-3047501.0
pct_change_intraday,-2120780.0
10d_realised_vol,121463.1
count,1120235.0
pos,-664536000.0
neg,-225516800.0


Unnamed: 0,Volume_next,y_pred
17,90095700.0,150593800.0
13,313175100.0,132044600.0
4,31565000.0,36125320.0
29,58489200.0,70451780.0
35,46208100.0,78117310.0
25,93973900.0,60612770.0
6,72376000.0,51421090.0
26,99106800.0,71704490.0


Coefficient R-squared: 0.28191644945592154


In [41]:
lasso_reg_volumenext(nok_df)

Regression Summary


  model = cd_fast.enet_coordinate_descent(


Unnamed: 0,Coefficient
Intercept,20566620.0
Open,-134818600.0
Adj Close,136660500.0
Volume,0.6013
pct_change_c2c,2337680.0
pct_change_intraday,-1703810.0
10d_realised_vol,191408.7
count,-3041.04
pos,299760200.0
neg,-874496300.0


Unnamed: 0,Volume_next,y_pred
29,24511600.0,28010010.0
15,68365800.0,45492790.0
24,48009600.0,64886680.0
17,121260400.0,81035710.0
8,79240200.0,90599470.0
9,62959000.0,81194590.0
30,83759200.0,53306280.0


Coefficient R-squared: 0.30500418837343535


In [42]:
lasso_reg_volumenext(tsla_df)

Regression Summary


  model = cd_fast.enet_coordinate_descent(


Unnamed: 0,Coefficient
Intercept,91671730.0
Open,-663787.9
Adj Close,569904.8
Volume,0.1685
pct_change_c2c,-798143.6
pct_change_intraday,-5171229.0
10d_realised_vol,-64043.58
count,-20073.97
pos,438047100.0
neg,22453410.0


Unnamed: 0,Volume_next,y_pred
33,33224800.0,26222880.0
36,30491900.0,41879760.0
4,15812700.0,21419650.0
13,17957100.0,28384530.0
30,29335600.0,34497020.0
26,67523300.0,66267630.0
6,20161700.0,20456080.0
27,60605700.0,14441710.0


Coefficient R-squared: 0.06294755029598476
