In [1]:
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
from torch.utils.tensorboard import SummaryWriter




In [2]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=3):
        super().__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.rnn = nn.RNN(input_size, hidden_size, n_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        h0=torch.zeros(self.n_layers, x.size(0), self.hidden_size).to('cuda')
        out, _ = self.rnn(x, h0)
        out=out[:,-1,:]
        out = self.fc(out)
        return out 
network=RNN(2,32,15,2).to('cuda')

In [3]:
from itertools import combinations
list_tar=[(1,2),(1,3),(1,4),(1,5),(1,6),(2,3),(2,4),(2,5),(2,6),(3,4),(3,5),(3,6),(4,5),(4,6),(5,6)]
enu=enumerate(list_tar)
encode_dict={tar:idx for idx,tar in enu}
encode_dict[-1]=-1
def encode(inp):
    return encode_dict[int(inp)]
def encode_list(inp_list):
    return [encode_dict[tar] for tar in inp_list]
enu=enumerate(list_tar)
decode_dict={idx:tar for idx,tar in enu}
decode_dict[-1]=-1
def decode(inp):
    return decode_dict[inp]
enu=enumerate(list_tar)
decode_dict_zero={idx:tar for idx,tar in enu}
decode_dict_zero[-1]=(0,0)
def decode_zero(inp):
    return decode_dict_zero[inp]
def decode_list(inp):
    return [decode_dict[tar] for tar in inp]

In [4]:
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
data=pd.read_csv('train.csv')

In [5]:
for i in range(15):
    data['input_'+str(i)]=data['input_'+str(i)].apply(decode_zero)

In [6]:
data=data[:10000]

In [7]:
# class CustomDataset(Dataset):
#     def __init__(self, dataframe):
#         self.dataframe = dataframe.iloc[:,1:]
#         self.feature = torch.tensor(self.dataframe.iloc[:,15:].values.tolist(), dtype=torch.float32)
#         self.label = torch.tensor(self.dataframe.iloc[:,:15].values.tolist(), dtype=torch.float32)

#     def __getitem__(self, index):
#         feature = self.feature[index]
#         label = self.label[index]
#         return feature, label

#     def __len__(self):
#         return len(self.label)
    
class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe.iloc[:,1:]
        self.feature = torch.tensor(self.dataframe.iloc[:,1:].values.tolist(), dtype=torch.float32)
        self.label = F.one_hot(torch.tensor(self.dataframe.iloc[:,0].to_numpy())).to(torch.float32)

    def __getitem__(self, index):
        feature = self.feature[index]
        label = self.label[index]
        return feature, label

    def __len__(self):
        return len(self.label)


In [8]:
pytorch_dataset = CustomDataset(data)
train_set, val_set = torch.utils.data.random_split(pytorch_dataset, [0.8, 0.2])

In [9]:
val_load = torch.utils.data.DataLoader(
    val_set
    , batch_size=len(val_set))
x_val, y_val = next(iter(val_load))
x_val = x_val.cuda()
y_val = y_val.cuda()
train_load = torch.utils.data.DataLoader(
    train_set
    , batch_size=64
)

In [10]:
writer=SummaryWriter('runs/RNN_point_input')
inputs, labels = next(iter(train_load))
writer.add_graph(network, inputs.cuda())

In [11]:
optimizer = optim.Adam(network.parameters(), lr=0.001, weight_decay=1e-3)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer)

@torch.no_grad()
def get_num_correct(preds, labels):
    return preds.argmax().eq(labels).sum().item()


@torch.no_grad()
def val_accuracy():
    preds = network(x_val)
    return get_num_correct(preds, y_val) / y_val.shape[0]


@torch.no_grad()
def val_loss(x_val):
    network.eval()
    x_val=x_val.reshape(x_val.shape[0],x_val.shape[1],2)
    preds = network(x_val)
    network.train()
    return F.cross_entropy(preds, y_val)


def train_one_epoch():
    network.train()
    total_loss = 0
    for i in train_load:
        network.train()
        optimizer.zero_grad()
        features, targets = i
        features = features.reshape(features.shape[0],features.shape[1],2).to('cuda') # reshape and move to cuda
        targets = targets.to('cuda')
        preds = network(features)
        loss = F.cross_entropy(preds, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()*features.shape[0]
    scheduler.step(val_loss(x_val))
    return f"loss: {total_loss / len(train_set)}\n val_loss: {val_loss(x_val)}\n"


In [12]:
for i in range(1000):
    m = train_one_epoch()
    print('epoch:', i + 1, m)

epoch: 1 loss: 2.7110855884552003
 val_loss: 2.7094645500183105

epoch: 2 loss: 2.708787317276001
 val_loss: 2.7096307277679443

epoch: 3 loss: 2.7083255577087404
 val_loss: 2.7097327709198

epoch: 4 loss: 2.7080508880615235
 val_loss: 2.7097702026367188

epoch: 5 loss: 2.707865287780762
 val_loss: 2.709773063659668

epoch: 6 loss: 2.707738473892212
 val_loss: 2.70975399017334

epoch: 7 loss: 2.707650775909424
 val_loss: 2.7097198963165283

epoch: 8 loss: 2.7075884456634522
 val_loss: 2.709676742553711

epoch: 9 loss: 2.7075419330596926
 val_loss: 2.709630012512207

epoch: 10 loss: 2.707504508972168
 val_loss: 2.709584951400757

epoch: 11 loss: 2.707471544265747
 val_loss: 2.709545135498047

epoch: 12 loss: 2.7074403553009034
 val_loss: 2.709512233734131

epoch: 13 loss: 2.70651243019104
 val_loss: 2.709618330001831

epoch: 14 loss: 2.706437490463257
 val_loss: 2.7097489833831787

epoch: 15 loss: 2.7063681507110595
 val_loss: 2.709888219833374

epoch: 16 loss: 2.7063077602386474
 val_l

KeyboardInterrupt: 

In [13]:
def to_old_network(inp:torch.tensor):
    new_inp=[decode_zero(i[0]) for i in inp[0].tolist()]
    return network(torch.tensor(new_inp,dtype=torch.float32).reshape(1,15,2).to('cuda'))
    

In [14]:
torch.save({'model_state_dict':network.state_dict(),'optimizer_state_dict':optimizer.state_dict()},f'./models/model_RNN_two_point.pth')
from itertools import combinations
network.eval()
list_tar=[(1,2),(1,3),(1,4),(1,5),(1,6),(2,3),(2,4),(2,5),(2,6),(3,4),(3,5),(3,6),(4,5),(4,6),(5,6)]
enu=enumerate(list_tar)
encode_dict={tar:idx for idx,tar in enu}
encode_dict[-1]=-1
def encode(inp):
    return encode_dict[inp]
def encode_list(inp_list):
    return [encode_dict[tar] for tar in inp_list]
enu=enumerate(list_tar)
decode_dict={idx:tar for idx,tar in enu}
decode_dict[-1]=-1
def decode(inp):
    return decode_dict[inp]
def decode_list(inp):
    return [decode_dict[tar] for tar in inp]
def form_tri(lst: list,inp: set):
    line_list=[x for x in lst+[inp] if x != -1]
    combs = list(combinations(line_list, 3))
    for comb in combs:
        (a,b),(c,d),(e,f)=comb
        if len(set([a,b,c,d,e,f]))==3:
            return True
    return False
def make_move(inp: list): # inptut is should be encoded.
    softmax = nn.Softmax(dim=1)
    network.eval()
    out=softmax(to_old_network(torch.tensor([inp],dtype=torch.float32).reshape(1,15,1).to('cuda'))).tolist()[0]
    descending_indices = sorted(list(range(len(out))), key=lambda i: out[i],reverse=True)
    for idx in descending_indices:
        if (idx in inp) or form_tri(decode_list(inp[1::2]),decode(idx)):
            continue
        else:
            return decode(idx)
    for idx in descending_indices:
        if (idx in inp):
            continue
        else:
            return decode(idx)
def make_move_no_assistance(inp: list):
    softmax = nn.Softmax(dim=1)
    network.eval()
    out=softmax(to_old_network(torch.tensor([inp],dtype=torch.float32).reshape(1,15,1).to('cuda'))).tolist()[0]
    descending_indices = sorted(list(range(len(out))), key=lambda i: out[i],reverse=True)
    for idx in descending_indices:
        if (idx in inp):
            continue
        else:
            return decode(idx)
from random import choice
def find_pos(inp_list):
    for i in range(2,7):
        lst=inp_list[:i+1]
        combs = list(combinations(lst, 3))
        combs=sorted(combs)
        for comb in combs:
            (a,b),(c,d),(e,f)=comb
            if len(set([a,b,c,d,e,f]))==3:
                return i
    return 7
# 0 stands for the the player one win, 1 stands for the player two win
def determine_winner(inp_list):
    pos_1=find_pos(inp_list[::2])
    pos_2=find_pos(inp_list[1::2])
    if pos_1<=pos_2:
        return 1
    else:
        return 0

def ai_play_against_random():
    output=[-1 for _ in range(15)]
    pos=0
    pick=choice(list_tar)
    output[pos]=pick
    pos+=1
    for i in range(7):
        output[pos]=make_move_no_assistance(encode_list(output))
        pos+=1
        pick=choice([x for x in list_tar if x not in output])
        output[pos]=pick
        pos+=1
    return determine_winner(decode_list(encode_list(output)))

def ai_play_against_naive():
    output=[-1 for _ in range(15)]
    pos=0
    pick=choice(list_tar)
    output[pos]=pick
    pos+=1
    output[pos]=make_move(encode_list(output))
    pos+=1
    pick=choice([x for x in list_tar if x not in output])
    output[pos]=pick
    pos+=1
    for i in range(6):
        output[pos]=make_move(encode_list(output))
        pos+=1
        temp_list=[x for x in list_tar if x not in output and x != -1]
        temp_list1=temp_list.copy()
        for j in range(len(temp_list)):
            if form_tri(output[::2],temp_list[j]):
                temp_list1.remove(temp_list[j])
        if temp_list1==[]:
            pick=choice([x for x in list_tar if x not in output])
        else:
            pick=choice(temp_list1)
        output[pos]=pick
        pos+=1
    return determine_winner(decode_list(encode_list(output)))

def naive_against_radom():
    output=[]
    for i in range(5):
        pick=choice(list_tar)
        output.append(pick)
    for i in range(5):
        temp_list=[x for x in list_tar if x not in output]
        temp_list1=temp_list.copy()
        for j in range(len(temp_list)):
            if form_tri(output[1::2],temp_list[j]):
                temp_list1.remove(temp_list[j])
        if temp_list1==[]:
            pick=choice([x for x in list_tar if x not in output])
        else:
            pick=choice(temp_list1)
        output.append(pick)
        while pick in output:
            pick=choice(list_tar)
        output.append(pick)
    return determine_winner(output)

def naive_against_naive():
    output=[]
    for i in range(4):
        pick=choice([x for x in list_tar if x not in output])
        output.append(pick)
    for i in range(5):
        temp_list=[x for x in list_tar if x not in output]
        temp_list1=temp_list.copy()
        for j in range(len(temp_list)):
            if form_tri(output[::2],temp_list[j]):
                temp_list1.remove(temp_list[j])
        if temp_list1==[]:
            pick=choice([x for x in list_tar if x not in output])
        else:
            pick=choice(temp_list1)
        output.append(pick)
        temp_list=[x for x in list_tar if x not in output]
        temp_list1=temp_list.copy()
        for j in range(len(temp_list)):
            if form_tri(output[1::2],temp_list[j]):
                temp_list1.remove(temp_list[j])
        if temp_list1==[]:
            pick=choice([x for x in list_tar if x not in output])
        else:
            pick=choice(temp_list1)
        output.append(pick)
    pick=choice([x for x in list_tar if x not in output])
    output.append(pick)
    return determine_winner(output)

def random_against_random():
    output=[]
    for i in range(15):
        pick=choice([x for x in list_tar if x not in output])
        output.append(pick)
    return determine_winner(output) 

In [18]:
# network=torch.load('./models/model_64_36_original_plain.pth').to('cuda')
network.eval()
n=0
for i in range(1000):
    n+=ai_play_against_random()
print(n/1000)

0.576


In [None]:
softmax = nn.Softmax(dim=1)
out=network(input=torch.tensor([[0,3,6,12,14,9,10,5,-1,-1,-1,-1,-1,-1,-1]],dtype=torch.float32).to('cuda')).tolist()[0]
print(out)
ascending_indices = sorted(list(range(len(out))), key=lambda i: out[i],reverse=True)
ascending_indices

TypeError: RNN.forward() got an unexpected keyword argument 'input'

In [None]:
network(torch.tensor([[10,0,5,14,8,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1]],dtype=torch.float32).to('cuda'))

tensor([[-0.2020, -0.1966, -0.1875, -0.1714, -0.1749, -0.0718, -0.0715, -0.0306,
          0.0674,  0.0718,  0.0226,  0.1946,  0.1817,  0.1759,  0.3197]],
       device='cuda:0', grad_fn=<AddmmBackward0>)