# LSTM Approach for Generating Mega Millions Numbers

Forked from: https://www.kaggle.com/code/emfhal/euromillion-lotto-prediction-using-lstm

## The libraries we will work with

In [15]:
import pandas as pd; print (pd.__version__)
import numpy as np; print (np.__version__)
import os
from pathlib import Path
from sklearn.preprocessing import StandardScaler

1.3.5
1.21.6


## Prepare/Generate data set

First, we load into our system the latest results of the lottery games in the input folder

In [16]:
filename = 'MegaMillionsQ123WxExtras.csv'

import pandas as pd
from google.colab import drive, files
import numpy as np

drive.mount('/content/drive/')
output_directory = "/content/drive/My Drive/"
lotto = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/LottoPredictions/data/' + filename, index_col = 'Date')
lotto = lotto.drop(['BB', 'LC', 'RH', 'PD','TMP', 'TMP2', 'TMP3'], axis = 1)
print(lotto)

df = lotto
df.columns = ['B1', 'B2', 'B3', 'B4', 'B5', 'MB']#, 'LC', 'RH', 'PD']
print (df)

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
            B1  B2  B3  B4  B5  B6
Date                              
2023-01-06   3  20  46  59  63  13
2023-01-03  25  29  33  41  44  18
2022-12-30   1   3   6  44  51   7
2022-12-27   9  13  36  59  61  11
2022-12-23  15  21  32  38  62   8
...         ..  ..  ..  ..  ..  ..
2017-11-07   1  54  60  68  69  11
2017-11-03  10  22  42  61  69   3
2017-10-31   6  28  31  52  53  12
2017-10-27  17  27  41  51  52  13
2017-10-24  20  24  34  56  64   6

[544 rows x 6 columns]
            B1  B2  B3  B4  B5  MB
Date                              
2023-01-06   3  20  46  59  63  13
2023-01-03  25  29  33  41  44  18
2022-12-30   1   3   6  44  51   7
2022-12-27   9  13  36  59  61  11
2022-12-23  15  21  32  38  62   8
...         ..  ..  ..  ..  ..  ..
2017-11-07   1  54  60  68  69  11
2017-11-03  10  22  42  61  69   3
2017-10-31   6  28  31  52  53  12
2017-

The winning numbers look like this (B1, B2, B3, B4, B5, MB - 1-5 balls that take range 0 to 70 each, MB takes 0-25, then extra fields):

In [17]:
df.head()

Unnamed: 0_level_0,B1,B2,B3,B4,B5,MB
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-01-06,3,20,46,59,63,13
2023-01-03,25,29,33,41,44,18
2022-12-30,1,3,6,44,51,7
2022-12-27,9,13,36,59,61,11
2022-12-23,15,21,32,38,62,8


Next we need to normalize data.

In [18]:
scaler = StandardScaler().fit(df.values)
transformed_dataset = scaler.transform(df.values)
transformed_df = pd.DataFrame(data=transformed_dataset, index=df.index)

Lets define hyper params of or model

In [19]:
number_of_rows= df.values.shape[0] #all our games
window_length = 5 #amount of past games we need to take in consideration for prediction
number_of_features = df.values.shape[1] #balls count

Create train dataset and labels for each row. It should have format for keras lstm model (rows, window zise, balls)

In [20]:
train = np.empty([number_of_rows-window_length, window_length, number_of_features], dtype=float)
label = np.empty([number_of_rows-window_length, number_of_features], dtype=float)

for i in range(0, number_of_rows-window_length):
    train[i]=transformed_df.iloc[i:i+window_length, 0: number_of_features]
    label[i]=transformed_df.iloc[i+window_length: i+window_length+1, 0: number_of_features]

Shapes

In [21]:
train.shape

(539, 5, 6)

In [22]:
label.shape

(539, 6)

In [23]:
train[0]

array([[-0.9229601 , -0.23287908,  0.90462503,  1.00723812,  0.45089026,
        -0.06687968],
       [ 1.53771855,  0.53388351, -0.09780496, -0.41249351, -1.40141569,
         0.6327847 ],
       [-1.14665815, -1.68120842, -2.17977494, -0.17587158, -0.71898718,
        -0.90647695],
       [-0.25186592, -0.82924999,  0.13352504,  1.00723812,  0.25591069,
        -0.34674544],
       [ 0.41922825, -0.14768324, -0.17491496, -0.64911545,  0.35340048,
        -0.76654407]])

In [24]:
train[1]

array([[ 1.53771855,  0.53388351, -0.09780496, -0.41249351, -1.40141569,
         0.6327847 ],
       [-1.14665815, -1.68120842, -2.17977494, -0.17587158, -0.71898718,
        -0.90647695],
       [-0.25186592, -0.82924999,  0.13352504,  1.00723812,  0.25591069,
        -0.34674544],
       [ 0.41922825, -0.14768324, -0.17491496, -0.64911545,  0.35340048,
        -0.76654407],
       [-0.9229601 , -1.59601258, -0.09780496, -0.80686341, -0.62149739,
         0.49285183]])

In [25]:
label[0]

array([-0.9229601 , -1.59601258, -0.09780496, -0.80686341, -0.62149739,
        0.49285183])

In [26]:
label[1]

array([-0.36371495,  1.04505858,  0.44196503,  0.53399424, -0.23153824,
       -0.34674544])

## The LSTM model

In [27]:
from keras.models import Sequential
from keras.models import load_model
from keras.layers import LSTM, Dense,Dropout

import numpy as np

batch_size = 25 

Training

In [28]:
if os.path.exists('../input/lstm/'+filename+'.h5'):
    model = load_model('../input/lstm/'+filename+'.h5')
else:
    model = Sequential()
    model.add(LSTM(32,      
               input_shape=(window_length, number_of_features),
               return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(32,           
               return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(number_of_features))
    model.compile(loss='mse', optimizer='rmsprop')
    model.fit(train, label,
          batch_size=64, epochs=5000)
    model.save('input/'+filename+'.h5')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 2502/5000
Epoch 2503/5000
Epoch 2504/5000
Epoch 2505/5000
Epoch 2506/5000
Epoch 2507/5000
Epoch 2508/5000
Epoch 2509/5000
Epoch 2510/5000
Epoch 2511/5000
Epoch 2512/5000
Epoch 2513/5000
Epoch 2514/5000
Epoch 2515/5000
Epoch 2516/5000
Epoch 2517/5000
Epoch 2518/5000
Epoch 2519/5000
Epoch 2520/5000
Epoch 2521/5000
Epoch 2522/5000
Epoch 2523/5000
Epoch 2524/5000
Epoch 2525/5000
Epoch 2526/5000
Epoch 2527/5000
Epoch 2528/5000
Epoch 2529/5000
Epoch 2530/5000
Epoch 2531/5000
Epoch 2532/5000
Epoch 2533/5000
Epoch 2534/5000
Epoch 2535/5000
Epoch 2536/5000
Epoch 2537/5000
Epoch 2538/5000
Epoch 2539/5000
Epoch 2540/5000
Epoch 2541/5000
Epoch 2542/5000
Epoch 2543/5000
Epoch 2544/5000
Epoch 2545/5000
Epoch 2546/5000
Epoch 2547/5000
Epoch 2548/5000
Epoch 2549/5000
Epoch 2550/5000
Epoch 2551/5000
Epoch 2552/5000
Epoch 2553/5000
Epoch 2554/5000
Epoch 2555/5000
Epoch 2556/5000
Epoch 2557/5000
Epoch 2558/5000
Epoch 2559/5000
Epoch 2

## Prediction

Last step, we would like to predict the next results, the prediction will be based on the model and based on the last 5 results. And we will export everything to a csv file

In [35]:
to_predict=df.iloc[-5:]
scaled_to_predict = scaler.transform(to_predict)



In [36]:
scaled_predicted_output_1 = model.predict(np.array([scaled_to_predict]))
data = scaler.inverse_transform(scaled_predicted_output_1).astype(int)
df = pd.DataFrame(data, columns=['B1', 'B2', 'B3', 'B4', 'B5', 'MB'])#, 'LC', 'RH', 'PD'])
#df.to_csv(''+filename+'.csv', index=False)  
df



Unnamed: 0,B1,B2,B3,B4,B5,MB
0,11,22,34,45,58,14


Conclusion <br/>
We developed a LSTM model to forecast lotery game. Thanks for reading