In [None]:
import pandas as pd
import numpy as np
import datetime as dt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [10]:
df = pd.read_csv('https://docs.google.com/spreadsheets/d/1ozmJBOaE2Xh6gjDGqy4TI27XIhyAL6h60QRy1MUsDso/pub?output=csv')
df.head(5)

Unnamed: 0,Date,Adj Close AAPL,Adj Close ABC,Adj Close AMZN,Adj Close BAC,Adj Close BP,Adj Close BRK-B,Adj Close CVS,Adj Close DDAIF,Adj Close F,Adj Close GOOG,Adj Close JPM,Adj Close MCK,Adj Close MSFT,Adj Close RDS-B,Adj Close T,Adj Close TM,Adj Close TOT,Adj Close UNH,Adj Close WMT,Adj Close XOM,Volume AAPL,Volume ABC,Volume AMZN,Volume BAC,Volume BP,Volume BRK-B,Volume CVS,Volume DDAIF,Volume F,Volume GOOG,Volume JPM,Volume MCK,Volume MSFT,Volume RDS-B,Volume T,Volume TM,Volume TOT,Volume UNH,Volume WMT,Volume XOM
0,2005-02-21,1.368799,11.901972,34.990002,32.976032,28.452971,60.32,19.241392,31.138027,8.256411,92.587959,24.088234,33.270592,18.141571,31.159843,10.165306,59.931969,24.774008,38.530373,36.302223,37.98233,4999013600,22080000,31627600,37366400,18590600,2125000,12465000,1408800,29261000,130201203,45331800,5752300,327812600,1838189,36831500,915300,8115000,21417000,46536700,108879400
1,2005-02-28,1.316963,12.427639,35.849998,32.968975,29.017036,59.580002,20.286993,30.857567,7.875347,92.602905,24.440075,33.685661,18.084099,32.294121,10.282875,60.843304,25.259737,39.331413,37.437328,38.168442,3746232000,20682800,29684900,48255000,20998700,3420000,34757800,1908600,55751500,77886448,52037600,9939700,322103600,2484741,33702100,1086900,11717200,20941800,61478000,128760400
2,2005-03-07,1.238824,11.963464,34.75,32.568451,28.190603,60.02,20.391163,30.957735,7.868996,88.568024,23.527895,33.156551,18.026617,31.413136,10.077134,60.897354,25.029329,38.062389,36.379776,36.655407,4204488400,20746800,29673700,41283300,20307100,4090000,20628200,1993600,41372100,94478205,60048400,8092000,316021800,2355602,30300300,1033500,12568800,23441200,47133000,170203800
3,2005-03-14,1.321578,12.072557,34.16,31.977856,28.584154,58.369999,20.391163,30.049568,7.233888,89.683846,23.462727,32.680378,17.466204,31.099272,9.804211,59.136475,25.292948,38.47134,36.273994,37.616077,3553877600,18499200,26026800,55968800,15497500,5540000,17063800,3556300,79130600,88381237,62250200,5431700,408338500,2058276,49319000,1251700,8275800,25304200,152631200,112404900
4,2005-03-21,1.307426,12.122155,32.880001,31.13109,26.983763,57.900002,20.622667,29.655588,7.170377,89.290321,22.759045,33.553383,17.444654,29.518976,9.783216,57.777191,24.109774,39.145905,35.822327,35.424568,2055071200,8516800,23236100,50533000,15531900,2980000,14253200,2137700,58470600,43493615,55754800,5021800,331673700,1687673,33866900,1247600,7194000,19556000,70161800,88667200


In [None]:
volume_columns = [col for col in df.columns if 'Volume' in col]
volume_columns

['Volume AAPL',
 'Volume ABC',
 'Volume AMZN',
 'Volume BAC',
 'Volume BP',
 'Volume BRK-B',
 'Volume CVS',
 'Volume DDAIF',
 'Volume F',
 'Volume GOOG',
 'Volume JPM',
 'Volume MCK',
 'Volume MSFT',
 'Volume RDS-B',
 'Volume T',
 'Volume TM',
 'Volume TOT',
 'Volume UNH',
 'Volume WMT',
 'Volume XOM']

In [None]:
df[volume_columns] = df[volume_columns].shift(1)

In [None]:
# convert `date` column to numerical values
# more precisely number of days since 1st january 1AD
df['Date'] = pd.to_datetime(df['Date'])
df['Date'] = df['Date'].map(dt.datetime.toordinal)

In [None]:
df = df.dropna()

In [None]:
X = pd.concat([df[volume_columns], df['Date']], axis=1)
y = df.filter(regex='Adj Close')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
lm = LinearRegression()
model = lm.fit(X_train, y_train)

In [None]:
yhat = model.predict(X_test)

print('Coefficients:', model.coef_)
print("MSE: %.2f" % mean_squared_error(y_test, yhat))
print('Coefficient of determination: %.2f'
      % r2_score(y_test, yhat))

Coefficients: [[-6.96153573e-10 -2.93169294e-07  2.04519277e-08 -2.54991619e-10
   6.31001995e-09 -3.90075769e-08 -1.69709567e-08  4.97429793e-08
   1.67358843e-08  1.95641220e-07 -5.50100836e-08 -5.48018504e-07
  -1.32969930e-08 -1.18833603e-07  5.84240046e-10 -7.29705140e-07
  -1.15793975e-07 -3.07259175e-08 -4.58884139e-08  2.51381022e-07
   1.51591720e-02]
 [-1.70785068e-09  2.06391540e-07  1.63927450e-08 -2.82106238e-09
   1.22221128e-08 -1.17324260e-08 -9.62251754e-08  7.01862801e-08
  -1.27038609e-08 -7.59808975e-09 -3.71739139e-08 -2.04173339e-07
   2.44367039e-09  1.26991728e-07  9.02246669e-09  6.41224610e-08
  -1.92342691e-07 -5.27925147e-08  6.24435214e-08  3.60244281e-08
   1.55263308e-02]
 [-3.97765075e-08 -1.05359290e-05  2.69261950e-06 -1.05218448e-07
  -3.78893722e-08 -6.78876524e-07  1.54876454e-06  2.24633227e-06
   5.40071174e-07  7.88671100e-06 -1.21341395e-06 -1.20947738e-05
  -3.40593283e-07 -1.35800135e-06  1.09177929e-07 -2.62944656e-05
  -6.28079056e-06 -4.937