<a href="https://colab.research.google.com/github/Zarif123/SSLM-Project/blob/main/chess_model_regression_zeroshot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [252]:
%%capture
!pip install transformers
!pip install python-chess

In [253]:
from datetime import datetime

from google.colab import drive
drive.mount('/content/gdrive')
folder = "/content/gdrive/MyDrive/Statistical_LM_Group_Folder"

csv_file = "chess_data_with_buckets.csv"
csv_path = f"{folder}/{csv_file}"


now = datetime.now()
dt_string = now.strftime("%m_%d_%Y_%H_%M")

model_file = "regression_model.pth"
datetime = dt_string
model_path = f"{folder}/{datetime}_{model_file}"

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [254]:
from transformers import BertTokenizer, AutoTokenizer, BertModel, BertConfig, get_linear_schedule_with_warmup
import torch.optim as optim
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from tqdm import tqdm
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader
from imblearn.under_sampling import RandomUnderSampler

###Parameters

In [255]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

params = {
    "batch_size": 1,
    "epochs": 5,
    "learning_rate": 0.00003,
    "warmup_steps": 0.01,
    "epsilon": 1e-8,
    "accum_iter": 8,
    "num_classes": 6,
    "dropout": 0.5
}

class Params:
  def __init__(self, **kwargs):
    for key, value in kwargs.items():
      setattr(self, key, value)

params = Params(**params)

### Regression Model

In [256]:
class BertRegressor(nn.Module):
  def __init__(self, dropout=params.dropout):
    super(BertRegressor, self).__init__()

    self.bert = BertModel.from_pretrained('bert-base-cased')
    self.dropout = nn.Dropout(dropout)
    self.linear = nn.Linear(768, 1)

  def forward(self, input_id, mask):
    _, pooled_output = self.bert(input_ids= input_id, attention_mask=mask,return_dict=False)
    dropout_output = self.dropout(pooled_output)
    linear_output = self.linear(dropout_output)

    return linear_output

### Dataset Class

In [257]:
class ChessDataset(torch.utils.data.Dataset):
  def __init__(self, moves, labels):
      tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
      self.labels = torch.Tensor(labels)
      self.moves = [tokenizer(move, 
                              padding='max_length', max_length = 256, truncation=True,
                              return_tensors="pt") for move in moves]

  def __len__(self):
      return len(self.labels)

  def __getitem__(self, idx):
      return self.moves[idx], self.labels[idx]

### Helper Functions

In [258]:
def rating_to_bucket(rating):
  if rating < 1300:
    return 0
  elif rating >= 1300 and rating < 1500:
    return 1
  elif rating >= 1500 and rating < 1700:
    return 2  
  elif rating >= 1700 and rating < 1900:
    return 3
  elif rating >= 1900 and rating < 2100:
    return 4
  elif rating >= 2100:
    return 5

In [259]:
def get_metrics(model_output, label):
  """
  Returns accuracy and loss of true ELO values
  """
  
  output = model_output.cpu().detach().numpy()
  label = label.cpu().detach().numpy()
  
  pred_rating = torch.Tensor(scaler.inverse_transform(output))
  real_rating = torch.Tensor(scaler.inverse_transform(label))
  loss = criterion(pred_rating, real_rating)

  pred_bucket = np.array(list(map(rating_to_bucket, pred_rating)))
  real_bucket = np.array(list(map(rating_to_bucket, real_rating)))

  acc = (pred_bucket == real_bucket).sum().item()
  return acc, loss

### Test Function

In [261]:
def test(model, test_dataloader, criterion, scaler):
  total_acc_test = 0
  total_loss_test = 0
  with torch.no_grad():
    for test_input, test_label in test_dataloader:
      test_label = test_label.to(device)
      mask = test_input['attention_mask'].to(device)
      input_id = test_input['input_ids'].squeeze(1).to(device)

      output = model(input_id, mask)
      
      acc, loss = get_metrics(output, test_label)
      total_acc_test += acc
      total_loss_test += loss
       
    print(f"Test Accuracy: {total_acc_test / len(test_dataloader): .3f}")
    print(f"Test Loss: {total_loss_test / len(test_dataloader): .3f}")

### Loading Data

In [262]:
%%capture
chess_data = pd.read_csv(csv_path)

X = chess_data["Moves"]
y = chess_data["Elo"]

scaler = StandardScaler()
y = scaler.fit_transform(y.values.reshape(-1, 1))

test_data = ChessDataset(X, y)
test_dataloader = torch.utils.data.DataLoader(test_data)

### Initialize Model

In [264]:
%%capture
model = BertRegressor()
model = model.to(device)
criterion = nn.L1Loss()
optimizer = optim.AdamW(model.parameters(), lr = params.learning_rate)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


### Run Testing

In [268]:
# test_model_name = "06_01_2023_20_27_regression_model.pth"
# test_model_path = f"{folder}/{test_model_name}"

test_model = BertRegressor()
test_model = test_model.to(device)
test(test_model, test_dataloader, criterion, scaler)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Test Accuracy:  0.171
Test Loss:  295.760
