In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from matplotlib import pyplot as plt

from tqdm import tqdm

In [3]:
# create dataset
def word_2_onehot(word: str):
    assert len(word) == 5, (word, len(word))
    # print(word, len(word))
    sample = torch.zeros((26*len(word), ))
    for i in range(len(word)):
        sample[ 26*i + ord(word[i])-97] = 1   
    return sample

class WordDataset(Dataset):
    def __init__(self):
        data = pd.read_excel('../UPDATE_Problem_C_Data_Wordle.xlsx')
        data = data.to_numpy()[::-1, 1:]
        # id, word, num_total, num_hard, 1, 2, 3, 4, 5, 6, X
        data[327, 2] = data[326, 2]
        self.words = data[:, 1]
        self.targets = torch.FloatTensor(data[:, 4:].astype(np.float64)/100)

    def __len__(self):
        return len(self.words)

    def __getitem__(self, index):
        ''' Return sample(R^130), target R^7 '''
        # # index: the data's first element's index
        # sample = torch.zeros((130, ))
        # # sample[0] = index
        # word = self.words[index]
        # # print(word)
        # for i in range(5):
        #     sample[ 26*i + ord(word[i])-97] = 1
        sample = word_2_onehot(self.words[index])
        target = self.targets[index]
        return sample, target


In [4]:
dataset = WordDataset()

train_size = int(len(dataset) * 0.9)
test_size = len(dataset) - train_size
train_set, test_set = torch.utils.data.random_split(dataset, [train_size, test_size])

batch_size = 16
train_loader = DataLoader(train_set, batch_size, True)
test_loader = DataLoader(test_set, 1, False)
print(dataset[10])

(tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.]), tensor([0.0100, 0.0800, 0.3200, 0.3200, 0.1800, 0.0800, 0.0200]))


In [5]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.layer1 = nn.Sequential(nn.Linear(130, 130), nn.BatchNorm1d(130), nn.Dropout(0.3), nn.ReLU())
        self.layer2 = nn.Sequential(nn.Linear(130, 64), nn.BatchNorm1d(64), nn.Dropout(0.3), nn.ReLU())
        self.layer3 = nn.Sequential(nn.Linear(64, 7), nn.ReLU())
        self.softmax = nn.Softmax(1)
    
    def forward(self,x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return self.softmax(x)

In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = MLP().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

num_epochs = 200

tbar = tqdm(range(num_epochs))

model.train()
for e in tbar:
    batch_loss = []
    for sample, target in train_loader:
        sample, target = sample.to(device), target.to(device)
        # print(sample.shape)
        out = model(sample)
        loss = criterion(out, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()    
        batch_loss.append(loss.cpu().item())
    
    tbar.set_postfix(loss=sum(batch_loss)/len(batch_loss))

torch.save(model.state_dict(), 'model.pth')

100%|██████████| 200/200 [00:10<00:00, 18.78it/s, loss=0.000611]


## evaluation

In [7]:
model.eval()
total_loss = []
predicted = []
gt = []
for sample, target in test_loader:
    sample, target = sample.to(device), target.to(device)
    # print(sample.shape)
    out = model(sample)
    loss = criterion(out, target)  
    total_loss.append(loss.cpu().item())
    predicted.append(out.cpu().detach())
    gt.append(target.cpu().detach())

# print(total_loss)
print(np.array(total_loss).mean())

0.0031317219420290915


## visualize the result on the test dataset

In [8]:
IDX = 1
for i in range(len(test_loader)):
    plt.plot(np.arange(7), predicted[i].numpy().reshape(-1), np.arange(7), gt[i].numpy().reshape(-1))
    plt.legend(['predicted', 'gt'])
    plt.savefig(f'test/{i}.png')
    plt.close()

## inference

In [12]:
test = word_2_onehot('eerie')
model(test.reshape(1, -1))

tensor([[0.0152, 0.0684, 0.2122, 0.3041, 0.2541, 0.1300, 0.0159]],
       grad_fn=<SoftmaxBackward0>)