In [None]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, HuberRegressor, Lasso, Ridge, ElasticNet
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

from catboost import CatBoostRegressor

import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input, Dropout

In [None]:
df = pd.read_csv("data/clean_lotto_max_data.csv")

In [None]:
pd.set_option("max.columns", None)
pd.set_option("max.rows", None)

In [None]:
del df["Unnamed: 0"]

In [None]:
df["last_draw"] = df["past_numbers"].shift(7)
df["two"] = df["past_numbers"].shift(14)
df["three"] = df["past_numbers"].shift(21)
df["four"] = df["past_numbers"].shift(28)
df["five"] = df["past_numbers"].shift(35)
df["six"] = df["past_numbers"].shift(42)
df["seven"] = df["past_numbers"].shift(49)
df["eight"] = df["past_numbers"].shift(56)
df["nine"] = df["past_numbers"].shift(63)
df["ten"] = df["past_numbers"].shift(70)
df["eleven"] = df["past_numbers"].shift(77)
df["twelve"] = df["past_numbers"].shift(84)
df["thirteen"] = df["past_numbers"].shift(91)
df["fourteen"] = df["past_numbers"].shift(98)
df["fifteen"] = df["past_numbers"].shift(105)
df["sixteen"] = df["past_numbers"].shift(112)
df["seventeen"] = df["past_numbers"].shift(119)
df["eightteen"] = df["past_numbers"].shift(126)
df["nineteen"] = df["past_numbers"].shift(133)
df["twenty"] = df["past_numbers"].shift(140)
df["twentyone"] = df["past_numbers"].shift(147)
df["twentytwo"] = df["past_numbers"].shift(154)
df["twentythree"] = df["past_numbers"].shift(161)
df["twentyfour"] = df["past_numbers"].shift(168)
df["twentyfive"] = df["past_numbers"].shift(175)
df["twentysix"] = df["past_numbers"].shift(182)
df["twentyseven"] = df["past_numbers"].shift(189)
df["twentyeight"] = df["past_numbers"].shift(196)
df["twentynine"] = df["past_numbers"].shift(203)
df["thirty"] = df["past_numbers"].shift(210)
df["thirtyone"] = df["past_numbers"].shift(217)
df["thirtytwo"] = df["past_numbers"].shift(224)
df["thirtythree"] = df["past_numbers"].shift(231)
df["thirtyfour"] = df["past_numbers"].shift(238)
df["thirtyfive"] = df["past_numbers"].shift(245)
df["thirtysix"] = df["past_numbers"].shift(252)
df["thirtyseven"] = df["past_numbers"].shift(259)
df["thirtyeight"] = df["past_numbers"].shift(266)
df["thirtynine"] = df["past_numbers"].shift(273)
df["fourty"] = df["past_numbers"].shift(280)
df["fourtyone"] = df["past_numbers"].shift(287)
df["fourtytwo"] = df["past_numbers"].shift(294)

In [None]:
df = df.dropna()

In [7]:
target = "past_numbers"
X = df.drop(target, axis=1)
y = df[target]

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [9]:
model = LinearRegression().fit(X_train,y_train)
print("LinearRegression train score is " + str(model.score(X_train,y_train)))
print("LinearRegression test score is " + str(model.score(X_test,y_test)))

print("Mean squared error is " + str(mean_squared_error(y_test, model.predict(X_test))**(1/2)))

LinearRegression train score is 0.7748384940491285
LinearRegression test score is 0.7793965621659263
Mean squared error is 6.737861782187537


In [None]:
model = Lasso().fit(X_train,y_train)
print("Lasso train score is " + str(model.score(X_train,y_train)))
print("Lasso test score is " + str(model.score(X_test,y_test)))

print("Mean squared error is " + str(mean_squared_error(y_test, model.predict(X_test))**(1/2)))

In [None]:
model = HuberRegressor().fit(X_train,y_train)
print("HuberRegressor train score is " + str(model.score(X_train,y_train)))
print("HuberRegressor test score is " + str(model.score(X_test,y_test)))

print("Mean squared error is " + str(mean_squared_error(y_test, model.predict(X_test))**(1/2)))

In [None]:
model = Ridge().fit(X_train,y_train)
print("Ridge train score is " + str(model.score(X_train,y_train)))
print("Ridge test score is " + str(model.score(X_test,y_test)))

print("Mean squared error is " + str(mean_squared_error(y_test, model.predict(X_test))**(1/2)))

In [None]:
model = ElasticNet().fit(X_train,y_train)
print("ElasticNet train score is " + str(model.score(X_train,y_train)))
print("ElasticNet test score is " + str(model.score(X_test,y_test)))

print("Mean squared error is " + str(mean_squared_error(y_test, model.predict(X_test))**(1/2)))

In [None]:
model = RandomForestRegressor().fit(X_train,y_train)
print("RandomForestRegressor train score is " + str(model.score(X_train,y_train)))
print("RandomForestRegressor test score is " + str(model.score(X_test,y_test)))

print("Mean squared error is " + str(mean_squared_error(y_test, model.predict(X_test))**(1/2)))

In [None]:
model = CatBoostRegressor(iterations=1500).fit(X_train,y_train)
print("CatBoostRegressor train score is " + str(model.score(X_train,y_train)))
print("CatBoostRegressor test score is " + str(model.score(X_test,y_test)))

print("Mean squared error is " + str(mean_squared_error(y_test, model.predict(X_test))**(1/2)))

In [None]:
model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(8, activation='relu'),
    Dense(4, activation='elu'),
    Dense(1, activation='elu')
])

model.compile(loss='mae', optimizer='adam')

In [None]:
checkpoint_filepath = '/tmp/checkpoint'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_loss',
    mode='auto',
    save_best_only=True)

In [None]:
history = model.fit(X_train, y_train,
                    validation_data=(X_test, y_test),
                    epochs=100, batch_size=4,
                    verbose=2, callbacks=[model_checkpoint_callback])

print("Mean squared error is " + str(mean_squared_error(y_test, model.predict(X_test))**(1/2)))

In [None]:
model.summary()

In [None]:
from sklearn.pipeline import make_pipeline
pipe = make_pipeline(model)
pipe.fit(X_train, y_train)

In [None]:
df.columns

In [None]:
feb_18 = pd.DataFrame({
    'last_draw': [46],
    "two":[48],
    "three":[43],
    "four":[46],
    "five":[48],
    "six":[42],
    "seven":[47],
    "eight":[49],
    "nine":[48],
    "ten":[40],
    "eleven":[47],
    "twelve":[47],
    "thirteen":[45],
    "fourteen":[46],
    "fifteen":[44]}
)

In [None]:
pipe.predict(feb_18)