In [1]:
!pip install requests
!pip install beautifulsoup4



### Obtain Database

In [3]:
from bs4 import BeautifulSoup
import requests
import os
from datetime import datetime
import numpy as np

### Normal data to UNIX Time

In [4]:
def to_unix(date_string):
    date_object = datetime.strptime(date_string, "%Y-%m-%d")
    unix_time = int(date_object.timestamp())
    return unix_time

In [76]:
database=[]
# https://www.loto49.ro/arhiva-loto49.php
def make_database():
    global database
    database.clear()
    datasets=os.listdir("dataset/html/")
    datasets.sort()
    for file in datasets:
        file_path=os.path.join("dataset/html", file)
        f=open(file_path, 'r')
        content=f.read()
        f.close()
        bs4=BeautifulSoup(content, features='html.parser')
        tbody=bs4.find_all("tbody")[0]
        trs=tbody.findAll("tr")
        for i in range(1, len(trs)):
            tr=trs[i]
            tds=tr.findAll("td")
            data=tds[0].text.strip()
            win=[]
            for j in range(1, 7):
                win.append(int(tds[j].text.strip()))
            database.append({
                "data": np.float32(((to_unix(data)-1000000000)/86400)), 
                "win": np.float32 (np.array(win))
            })
            #print(file, data, to_unix(data), win)

In [77]:
make_database()
len(database)

1128

### Model and Neural Network

In [78]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import torch.nn as nn

In [79]:
class LotoDataset(Dataset):
    def __init__(self, database, training=None):
        self.database=database
        self.database=np.random.RandomState(seed=69).permutation(self.database)
        cut=int(0.9*len(self.database))
        if training==True:
            self.database=self.database[:cut]
        elif training==False:
            self.database=self.database[cut:]
    def __len__(self):
        return len(self.database)
    def __getitem__(self, idx):
        win=self.database[idx]["win"]
        real_win=[]
        for nr in win:
            arr=np.zeros(49)
            arr[int(nr)-1]=1
            real_win.append(arr)
        return self.database[idx]["data"], np.float32(np.array(real_win).flatten())

In [80]:
dataset_training=LotoDataset(database=database, training=True)
dataset_testing=LotoDataset(database=database, training=False)
loader_training=DataLoader(dataset_training, batch_size=1)
loader_testing=DataLoader(dataset_testing, batch_size=1)

In [101]:
len(dataset_training)

1015

In [102]:
len(dataset_testing)

113

In [103]:
class LotoModel(nn.Module):
    def __init__(self, n_hidden):
        super().__init__()
        self.l1=nn.Linear(1, n_hidden)
        self.r1=nn.ReLU()
        self.l2=nn.Linear(n_hidden, n_hidden)
        self.r2=nn.ReLU()
        self.l3=nn.Linear(n_hidden, 294)
    def forward(self, x):
        x=self.r1(self.l1(x))
        x=self.r2(self.l2(x))
        x=self.l3(x)
        return x

In [104]:
model=LotoModel(1000)

In [105]:
n_epochs=1000
loss_fn=nn.CrossEntropyLoss()
optimizer=torch.optim.SGD(params=model.parameters(), lr=0.01)

In [None]:
for epoch in range(n_epochs):
    index=0
    for data, win in loader_training:
        y_pred=model(data)
        loss=loss_fn(y_pred, win[0])
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        if index%2000==0:
            print (f'Epoch: {epoch+1} Loss: {loss}')
        index+=1

Epoch: 1 Loss: 5863.92578125
Epoch: 2 Loss: 33.72492599487305
Epoch: 3 Loss: 33.596527099609375
Epoch: 4 Loss: 33.56105041503906
Epoch: 5 Loss: 33.56764221191406
Epoch: 6 Loss: 33.588104248046875
Epoch: 7 Loss: 33.60704803466797
Epoch: 8 Loss: 33.619911193847656
Epoch: 9 Loss: 33.627830505371094
Epoch: 10 Loss: 33.633541107177734


In [None]:
def array_to_nrs(arr):
    arr=arr.numpy().reshape(6, 49)
    data=[]
    for ln in arr:
        ln=np.array(ln)
        nr=np.argmax(ln)
        data.append(int(nr)+1)
    return data

In [None]:
with torch.no_grad():
    index=0
    for data, win in loader_testing:
        print (data)
        y_pred=model(data)
        pred=array_to_nrs(y_pred)
        real=array_to_nrs(win)
        print (pred, real)
        index+=1
        if (index==10):
            break