In [210]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [211]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [212]:
data = pd.read_csv("./.data/lotto_num.CSV", encoding="euc-kr",
                   usecols=[0, 2, 3, 4, 5, 6, 7, 8])
print(data)

      회차   1   2   3   4   5   6  보너스
0    899   8  19  20  21  33  39   37
1    898  18  21  28  35  37  42   17
2    897   6   7  12  22  26  36   29
3    896   5  12  25  26  38  45   23
4    895  16  26  31  38  39  41   23
..   ...  ..  ..  ..  ..  ..  ..  ...
894    5  16  24  29  40  41  42    3
895    4  14  27  30  31  40  42    2
896    3  11  16  19  21  27  31   30
897    2   9  13  21  25  32  42    2
898    1  10  23  29  33  37  40   16

[899 rows x 8 columns]


In [226]:
data_round = data["회차"].astype(int)
data_num = data[["1", "2", "3", "4", "5", "6"]].astype(float)
print(data_round)
print(data_num)

0      899
1      898
2      897
3      896
4      895
      ... 
894      5
895      4
896      3
897      2
898      1
Name: 회차, Length: 899, dtype: int32
        1     2     3     4     5     6
0     8.0  19.0  20.0  21.0  33.0  39.0
1    18.0  21.0  28.0  35.0  37.0  42.0
2     6.0   7.0  12.0  22.0  26.0  36.0
3     5.0  12.0  25.0  26.0  38.0  45.0
4    16.0  26.0  31.0  38.0  39.0  41.0
..    ...   ...   ...   ...   ...   ...
894  16.0  24.0  29.0  40.0  41.0  42.0
895  14.0  27.0  30.0  31.0  40.0  42.0
896  11.0  16.0  19.0  21.0  27.0  31.0
897   9.0  13.0  21.0  25.0  32.0  42.0
898  10.0  23.0  29.0  33.0  37.0  40.0

[899 rows x 6 columns]


In [227]:
scaler = MinMaxScaler()
scaler.fit(data_num)
data_normal = scaler.transform(data_num)
print(data_normal)

data_num = data_num.to_numpy()
print(data_num)

[[0.20588235 0.48571429 0.45945946 0.42105263 0.68571429 0.76923077]
 [0.5        0.54285714 0.67567568 0.78947368 0.8        0.88461538]
 [0.14705882 0.14285714 0.24324324 0.44736842 0.48571429 0.65384615]
 ...
 [0.29411765 0.4        0.43243243 0.42105263 0.51428571 0.46153846]
 [0.23529412 0.31428571 0.48648649 0.52631579 0.65714286 0.88461538]
 [0.26470588 0.6        0.7027027  0.73684211 0.8        0.80769231]]
[[ 8. 19. 20. 21. 33. 39.]
 [18. 21. 28. 35. 37. 42.]
 [ 6.  7. 12. 22. 26. 36.]
 ...
 [11. 16. 19. 21. 27. 31.]
 [ 9. 13. 21. 25. 32. 42.]
 [10. 23. 29. 33. 37. 40.]]


In [228]:
def make_label(data, size):
    train_set = []
    label = []
    l = len(data)
    print(l)
    for i in range(l - size):
        train_tmp = data[i:i+size]
        train_set.append(train_tmp)
        label_tmp = data[i+size]
        label.append(label_tmp)
    return train_set, label

In [293]:
train_size = 300

print(data_num.shape)
train_set = data_normal[:-train_size]
test_set = data_normal[-train_size:]

# train_set = data_num[:-train_size]
# test_set = data_num[-train_size:]

train_set, label = make_label(train_set, train_size)

train_set = torch.Tensor(train_set)
label = torch.Tensor(label)
test_set = torch.Tensor(test_set)
test_set = test_set.view(1, train_size, -1)

print(train_set.shape)
print(test_set.shape)
print(label.shape)
print(train_set)

(899, 6)
599
torch.Size([299, 300, 6])
torch.Size([1, 300, 6])
torch.Size([299, 6])
tensor([[[ 8., 19., 20., 21., 33., 39.],
         [18., 21., 28., 35., 37., 42.],
         [ 6.,  7., 12., 22., 26., 36.],
         ...,
         [13., 14., 22., 27., 30., 38.],
         [ 2., 16., 19., 31., 34., 35.],
         [ 5., 11., 14., 27., 29., 36.]],

        [[18., 21., 28., 35., 37., 42.],
         [ 6.,  7., 12., 22., 26., 36.],
         [ 5., 12., 25., 26., 38., 45.],
         ...,
         [ 2., 16., 19., 31., 34., 35.],
         [ 5., 11., 14., 27., 29., 36.],
         [ 5., 12., 17., 29., 34., 35.]],

        [[ 6.,  7., 12., 22., 26., 36.],
         [ 5., 12., 25., 26., 38., 45.],
         [16., 26., 31., 38., 39., 41.],
         ...,
         [ 5., 11., 14., 27., 29., 36.],
         [ 5., 12., 17., 29., 34., 35.],
         [ 4., 12., 24., 33., 38., 45.]],

        ...,

        [[ 2., 19., 25., 26., 27., 43.],
         [13., 14., 22., 27., 30., 38.],
         [ 2., 16., 19., 31., 34.,

In [294]:
class Lotto(nn.Module):
    def __init__(self, input_size=6, hidden_size=256, output_size=6):
        super(Lotto, self).__init__()
        self.hidden_size = hidden_size
        self.n_layer = 2
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=self.n_layer)
        self.linear = nn.Linear(self.hidden_size, output_size)
        self.h = torch.randn(self.n_layer, 1, self.hidden_size)
        self.c = torch.randn(self.n_layer, 1, self.hidden_size)

    def forward(self, data):
        lstm_out, [self.h, self.c] = self.lstm(
            data.view(len(data), 1, -1), [self.h, self.c])
        pred = self.linear(lstm_out.view(len(data), -1))
        return pred[-1]

In [309]:
model = Lotto().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
print(model)

Lotto(
  (lstm): LSTM(6, 256, num_layers=2)
  (linear): Linear(in_features=256, out_features=6, bias=True)
)


In [310]:
model.train()

epochs = 100
for epoch in range(epochs):
    loss_sum = 0.
    for i, data in enumerate(train_set):
        _label = label[i]

        data = data.to(device)
        _label = _label.to(device)

        optimizer.zero_grad()
        model.h = torch.randn(model.n_layer, 1, model.hidden_size).to(device)
        model.c = torch.randn(model.n_layer, 1, model.hidden_size).to(device)

        pred = model(data)
        loss = criterion(pred, _label)
        loss_sum += loss
        loss.backward()
        optimizer.step()

    print("Epoch: {}/{}, loss: {}" .format(epoch +
                                           1, epochs, loss_sum.item()/len(data)))

Epoch: 1/100, loss: 431.26203125
Epoch: 2/100, loss: 218.23462239583333
Epoch: 3/100, loss: 123.59352864583333
Epoch: 4/100, loss: 76.26289713541667
Epoch: 5/100, loss: 54.10892252604167
Epoch: 6/100, loss: 44.6430859375
Epoch: 7/100, loss: 41.020703125
Epoch: 8/100, loss: 39.799423828125
Epoch: 9/100, loss: 39.43990885416667
Epoch: 10/100, loss: 39.34695638020833
Epoch: 11/100, loss: 39.325442708333334
Epoch: 12/100, loss: 39.321207682291664
Epoch: 13/100, loss: 39.321123046875
Epoch: 14/100, loss: 39.32193359375
Epoch: 15/100, loss: 39.32289388020833
Epoch: 16/100, loss: 39.32375
Epoch: 17/100, loss: 39.324453125
Epoch: 18/100, loss: 39.325016276041666
Epoch: 19/100, loss: 39.325442708333334
Epoch: 20/100, loss: 39.325771484375
Epoch: 21/100, loss: 39.32603515625
Epoch: 22/100, loss: 39.326243489583334
Epoch: 23/100, loss: 39.326360677083336
Epoch: 24/100, loss: 39.326481119791666
Epoch: 25/100, loss: 39.32655924479167
Epoch: 26/100, loss: 39.326617838541665
Epoch: 27/100, loss: 39.3

In [314]:
model.eval()

test_set = test_set.to(device)
train_set = train_set.to(device)

with torch.no_grad():
    result = model(test_set[0])

result = result.to("cpu")
print(result)

# actual_result = result.detach().numpy()
actual_result = scaler.inverse_transform([actual_result])

actual_num = np.around(actual_result)
actual_num = np.sort(actual_num)
print(actual_num)

tensor([ 5.4006, 10.3993, 16.2590, 21.6882, 27.7450, 34.2735])
[ 5. 10. 16. 22. 28. 34.]
