In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [2]:
df = pd.read_csv("lotomania_result.csv")

In [3]:
df.drop(['Concurso', 'Data Sorteio'], axis=1, inplace=True)

In [4]:
scaler = StandardScaler().fit(df.values)
transformed_dataset = scaler.transform(df.values)
transformed_df = pd.DataFrame(data=transformed_dataset, index=df.index)

In [5]:
# All our games
number_of_rows = df.values.shape[0]
number_of_rows

2450

In [6]:
# Amount of games we need to take into consideration for prediction
window_length = 10
window_length 

10

In [7]:
# Balls counts
number_of_features = df.values.shape[1]
number_of_features

20

In [8]:
X = np.empty([ number_of_rows - window_length, window_length, number_of_features], dtype=float)
X

array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0.

In [9]:
y = np.empty([ number_of_rows - window_length, number_of_features], dtype=float)
y

array([[1.06768751e-311, 1.06776102e-311, 1.65399910e+001, ...,
        1.54689706e+001, 1.54689706e+001, 2.58146522e+002],
       [3.73672570e+000, 1.54689706e+001, 4.48079967e-003, ...,
        3.73672570e+000, 1.54689706e+001, 1.13835835e+000],
       [4.48079967e-003, 8.70603249e-001, 4.27223590e+000, ...,
        1.65399910e+001, 9.40611345e+000, 3.73672570e+000],
       ...,
       [1.41376000e+001, 1.53760000e+000, 5.77600000e-001, ...,
        1.41376000e+001, 5.77600000e-001, 1.41376000e+001],
       [2.26576000e+001, 7.61760000e+000, 5.76000000e-002, ...,
        5.76000000e-002, 3.89376000e+001, 1.41376000e+001],
       [2.74576000e+001, 5.77600000e-001, 3.09760000e+000, ...,
        2.26576000e+001, 5.76000000e-002, 2.26576000e+001]])

In [10]:
for i in range(0, number_of_rows-window_length):
    X[i] = transformed_df.iloc[i : i+window_length, 0 : number_of_features]
    y[i] = transformed_df.iloc[i+window_length : i+window_length+1, 0 : number_of_features]

In [11]:
X.shape

(2440, 10, 20)

In [12]:
y.shape

(2440, 20)

In [11]:
# Recurrent Neural Netowrk (RNN) with Long Short Term Memory (LSTM)
# Importing the Keras libraries and packages
from keras.models import Sequential
from keras.layers import LSTM, Dense, Bidirectional, Dropout
batch_size = 50

In [66]:
# Initialising the RNN
model = Sequential()

# Adding the input layer and the LSTM layer
model.add(Bidirectional(LSTM(190,
                        input_shape = (window_length, number_of_features),
                        return_sequences = True)))
# Adding a first Dropout layer
model.add(Dropout(0.2))

# Adding a LSTM layer
model.add(Bidirectional(LSTM(180,
                        input_shape = (window_length, number_of_features),
                        return_sequences = True)))
# Adding a LSTM layer
model.add(Bidirectional(LSTM(170,
                        input_shape = (window_length, number_of_features),
                        return_sequences = True)))
# Adding a Dropout layer
model.add(Dropout(0.2))

# Adding a LSTM layer
model.add(Bidirectional(LSTM(160,
                        input_shape = (window_length, number_of_features),
                        return_sequences = True)))

# Adding a second LSTM layer
model.add(Bidirectional(LSTM(150,
                        input_shape = (window_length, number_of_features),
                        return_sequences = True)))
# Adding a second Dropout layer
model.add(Dropout(0.2))

# Adding a LSTM layer
model.add(Bidirectional(LSTM(140,
                        input_shape = (window_length, number_of_features),
                        return_sequences = True)))
# Adding a LSTM layer
model.add(Bidirectional(LSTM(130,
                        input_shape = (window_length, number_of_features),
                        return_sequences = True)))
# Adding a Dropout layer
model.add(Dropout(0.2))
# Adding a LSTM layer
model.add(Bidirectional(LSTM(120,
                        input_shape = (window_length, number_of_features),
                        return_sequences = True)))
# Adding a LSTM layer
model.add(Bidirectional(LSTM(110,
                        input_shape = (window_length, number_of_features),
                        return_sequences = True)))
# Adding a Dropout layer
model.add(Dropout(0.2))
# Adding a LSTM layer
model.add(Bidirectional(LSTM(100,
                        input_shape = (window_length, number_of_features),
                        return_sequences = True)))
# Adding a third LSTM layer
model.add(Bidirectional(LSTM(90,
                        input_shape = (window_length, number_of_features),
                        return_sequences = True)))
# Adding a fourth Dropout layer
model.add(Dropout(0.2))
# Adding a fourth LSTM layer
model.add(Bidirectional(LSTM(80,
                        input_shape = (window_length, number_of_features),
                        return_sequences = False)))
# Adding a fourth LSTM layer
model.add(Bidirectional(LSTM(70,
                        input_shape = (window_length, number_of_features),
                        return_sequences = False)))
# Adding a output layer
model.add(Dense(60))
# Adding a Dropout layer
model.add(Dropout(0.2))
# Another output layer
model.add(Dense(30))
# Adding the last output layer
model.add(Dense(number_of_features))

In [14]:
# Initialising the RNN
model = Sequential()
# Adding the input layer and the LSTM layer
model.add(Bidirectional(LSTM(200,
                        input_shape = (window_length, number_of_features),
                        return_sequences = True)))
# Adding a first Dropout layer
model.add(Dropout(0.2))
# Adding a second LSTM layer
model.add(Bidirectional(LSTM(190,
                        input_shape = (window_length, number_of_features),
                        return_sequences = True)))
# Adding a second Dropout layer
model.add(Dropout(0.2))
# Adding a third LSTM layer
model.add(Bidirectional(LSTM(170,
                        input_shape = (window_length, number_of_features),
                        return_sequences = True)))
# Adding a fourth LSTM layer
model.add(Bidirectional(LSTM(170,
                        input_shape = (window_length, number_of_features),
                        return_sequences = False)))
# Adding a fourth Dropout layer
model.add(Dropout(0.2))
# Adding a fourth LSTM layer
model.add(Bidirectional(LSTM(150,
                        input_shape = (window_length, number_of_features),
                        return_sequences = False)))
# Adding a fourth LSTM layer
model.add(Bidirectional(LSTM(150,
                        input_shape = (window_length, number_of_features),
                        return_sequences = False)))
# Adding a fourth Dropout layer
model.add(Dropout(0.2))
# Adding a fourth LSTM layer
model.add(Bidirectional(LSTM(130,
                        input_shape = (window_length, number_of_features),
                        return_sequences = False)))
# Adding a fourth LSTM layer
model.add(Bidirectional(LSTM(130,
                        input_shape = (window_length, number_of_features),
                        return_sequences = False)))
# Adding a fourth Dropout layer
model.add(Dropout(0.2))
# Adding a fourth LSTM layer
model.add(Bidirectional(LSTM(110,
                        input_shape = (window_length, number_of_features),
                        return_sequences = False)))
# Adding a fourth LSTM layer
model.add(Bidirectional(LSTM(110,
                        input_shape = (window_length, number_of_features),
                        return_sequences = False)))
# Adding a fourth Dropout layer
model.add(Dropout(0.2))
# Adding the first output layer
model.add(Dense(60))
# Adding a fourth Dropout layer
model.add(Dropout(0.1))
# Adding another output layer
model.add(Dense(30))
# Adding the last output layer
model.add(Dense(number_of_features))

In [12]:
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
model.compile(optimizer=Adam(learning_rate=0.00025), loss ='mse', metrics=['accuracy'])

NameError: name 'model' is not defined

In [15]:
from keras.callbacks import ModelCheckpoint

# Define the checkpoint
filepath = "model_v3.h5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [17]:
model.fit(x=X, y=y, batch_size=50, epochs=400, callbacks=callbacks_list, verbose=2)

Epoch 1/400


ValueError: in user code:

    File "c:\Users\Usuario\anaconda3\lib\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\Usuario\anaconda3\lib\site-packages\keras\engine\training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\Usuario\anaconda3\lib\site-packages\keras\engine\training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\Usuario\anaconda3\lib\site-packages\keras\engine\training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "c:\Users\Usuario\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\Users\Usuario\anaconda3\lib\site-packages\keras\engine\input_spec.py", line 232, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer "sequential" "                 f"(type Sequential).
    
    Input 0 of layer "bidirectional_4" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 340)
    
    Call arguments received by layer "sequential" "                 f"(type Sequential):
      • inputs=tf.Tensor(shape=(None, 10, 20), dtype=float32)
      • training=True
      • mask=None


In [13]:
from numpy.testing import assert_allclose
from tensorflow.keras.models import Sequential, load_model

In [16]:
# Load the saved model
filepath = 'model_v2.h5'
new_model = load_model(filepath)
#assert_allclose(model.predict(X), new_model.predict(X), 1e-5)

# Continue Fit the model
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
new_model.fit(x=X, y=y, batch_size=50, epochs=300, callbacks=callbacks_list, verbose=2)

Epoch 1/300

Epoch 1: loss improved from inf to 0.02501, saving model to model_v2.h5
49/49 - 67s - loss: 0.0250 - accuracy: 0.7422 - 67s/epoch - 1s/step
Epoch 2/300

Epoch 2: loss did not improve from 0.02501
49/49 - 19s - loss: 0.0258 - accuracy: 0.7406 - 19s/epoch - 387ms/step
Epoch 3/300

Epoch 3: loss did not improve from 0.02501
49/49 - 18s - loss: 0.0257 - accuracy: 0.7566 - 18s/epoch - 361ms/step
Epoch 4/300

Epoch 4: loss did not improve from 0.02501
49/49 - 17s - loss: 0.0261 - accuracy: 0.7275 - 17s/epoch - 349ms/step
Epoch 5/300

Epoch 5: loss did not improve from 0.02501
49/49 - 17s - loss: 0.0257 - accuracy: 0.7377 - 17s/epoch - 338ms/step
Epoch 6/300

Epoch 6: loss did not improve from 0.02501
49/49 - 17s - loss: 0.0257 - accuracy: 0.7434 - 17s/epoch - 340ms/step
Epoch 7/300

Epoch 7: loss did not improve from 0.02501
49/49 - 17s - loss: 0.0257 - accuracy: 0.7484 - 17s/epoch - 343ms/step
Epoch 8/300

Epoch 8: loss did not improve from 0.02501
49/49 - 17s - loss: 0.0254 - 

<keras.callbacks.History at 0x1f73ca03f10>

In [22]:
to_predict = df.tail(10)
#to_predict.drop([to_predict.index[-1]],axis=0, inplace=True)
to_predict

Unnamed: 0,Bola1,Bola2,Bola3,Bola4,Bola5,Bola6,Bola7,Bola8,Bola9,Bola10,Bola11,Bola12,Bola13,Bola14,Bola15,Bola16,Bola17,Bola18,Bola19,Bola20
2440,1,3,4,9,11,13,24,25,36,39,49,64,65,67,75,82,88,96,97,100
2441,3,4,6,8,14,15,19,23,31,34,48,49,52,60,77,82,85,89,91,93
2442,7,8,15,30,33,51,57,60,68,69,74,75,76,78,84,85,89,92,93,100
2443,6,13,29,31,33,42,48,51,52,58,63,64,82,84,87,88,90,94,98,100
2444,4,18,19,23,24,30,33,42,43,46,52,53,55,63,65,71,80,82,89,93
2445,1,11,23,25,34,39,44,47,56,65,67,72,74,78,80,89,91,92,98,99
2446,4,5,7,15,39,40,44,51,55,60,62,68,74,75,76,77,80,88,95,97
2447,7,9,18,20,21,28,29,35,38,39,42,61,64,70,73,76,78,87,89,90
2448,3,14,15,17,18,32,34,37,46,52,54,59,60,62,73,85,87,91,94,100
2449,5,9,19,21,22,28,45,53,65,67,71,75,81,83,85,86,92,94,95,98


In [23]:
to_predict = np.array(to_predict)
to_predict

array([[  1,   3,   4,   9,  11,  13,  24,  25,  36,  39,  49,  64,  65,
         67,  75,  82,  88,  96,  97, 100],
       [  3,   4,   6,   8,  14,  15,  19,  23,  31,  34,  48,  49,  52,
         60,  77,  82,  85,  89,  91,  93],
       [  7,   8,  15,  30,  33,  51,  57,  60,  68,  69,  74,  75,  76,
         78,  84,  85,  89,  92,  93, 100],
       [  6,  13,  29,  31,  33,  42,  48,  51,  52,  58,  63,  64,  82,
         84,  87,  88,  90,  94,  98, 100],
       [  4,  18,  19,  23,  24,  30,  33,  42,  43,  46,  52,  53,  55,
         63,  65,  71,  80,  82,  89,  93],
       [  1,  11,  23,  25,  34,  39,  44,  47,  56,  65,  67,  72,  74,
         78,  80,  89,  91,  92,  98,  99],
       [  4,   5,   7,  15,  39,  40,  44,  51,  55,  60,  62,  68,  74,
         75,  76,  77,  80,  88,  95,  97],
       [  7,   9,  18,  20,  21,  28,  29,  35,  38,  39,  42,  61,  64,
         70,  73,  76,  78,  87,  89,  90],
       [  3,  14,  15,  17,  18,  32,  34,  37,  46,  52,  54,  

In [24]:
scaled_to_predict = scaler.transform(to_predict)

In [25]:
y_pred = new_model.predict(np.array([scaled_to_predict]))
print("The predicted numbers in the last lottery game are:", scaler.inverse_transform(y_pred).astype(int)[0])

The predicted numbers in the last lottery game are: [ 2  5  7 11 15 25 33 39 45 54 61 61 66 68 72 79 81 83 90 94]


In [35]:
prediction = df.tail(1)
prediction = np.array(prediction)
print("The actual numbers in the last lottery game were:", prediction[0])

The actual numbers in the last lottery game were: [ 5  9 19 21 22 28 45 53 65 67 71 75 81 83 85 86 92 94 95 98]
