In [72]:
import gc
torch.cuda.empty_cache()
gc.collect()
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)


Thu Dec  7 23:59:31 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   42C    P0    76W / 400W |  19921MiB / 40960MiB |    100%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [21]:
import torch, gc, random
import transformers
import accelerate
from transformers.file_utils import is_tf_available, is_torch_available
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_squared_error, mean_absolute_error
import pandas as pd
import numpy as np

GoalKeeper Section

In [5]:
class MakeDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
        item["labels"] = torch.tensor([self.labels[idx]])
        item["labels"] = float(item["labels"])
        return item

    def __len__(self):
        return len(self.labels)

def comp_metrics(eval_pred):
    logits, labels = eval_pred
    labels = labels.reshape(-1, 1)

    mse = mean_squared_error(labels, logits)
    rmse = mean_squared_error(labels, logits, squared=False)
    mae = mean_absolute_error(labels, logits)
    r2 = r2_score(labels, logits)
    smape = 1/len(labels) * np.sum(2 * np.abs(logits-labels) / (np.abs(labels) + np.abs(logits))*100)

    return {"mse": mse, "rmse": rmse, "mae": mae, "r2": r2, "smape": smape}

In [17]:
df_gk = pd.read_csv('GK_6.csv')
X_gk = df_gk['input']
Y_gk = df_gk['output']
X_train, X_test, y_train, y_test = train_test_split(X_gk.tolist(), Y_gk, test_size=0.33)
tokenizer = AutoTokenizer.from_pretrained('allenai/longformer-base-4096')
train_encode = tokenizer(X_train, truncation=True, padding=True, max_length=2000)
test_encode = tokenizer(X_test, truncation=True, padding=True, max_length=2000)
train = MakeDataset(train_encode, y_train.ravel())
test = MakeDataset(test_encode, y_test.ravel())
model = AutoModelForSequenceClassification.from_pretrained('allenai/longformer-base-4096', num_labels = 1).cuda()

Some weights of LongformerForSequenceClassification were not initialized from the model checkpoint at allenai/longformer-base-4096 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
!pip install accelerate

Collecting accelerate
  Downloading accelerate-0.25.0-py3-none-any.whl (265 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
Successfully installed accelerate-0.25.0


In [None]:
print(transformers.__version__, accelerate.__version__)
import gc
torch.cuda.empty_cache()
gc.collect()
training_args = TrainingArguments(
    output_dir ='./results',
    num_train_epochs = 28,
    per_device_train_batch_size = 4,
    per_device_eval_batch_size = 4,
    weight_decay = 0.01,
    learning_rate = 2e-5,
    logging_dir = './logs',
    logging_steps = 100,
    save_total_limit = 10,
    load_best_model_at_end = True,
    metric_for_best_model = 'rmse',
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
)

trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = train,
    eval_dataset = test,
    compute_metrics = comp_metrics,
)

trainer.train()
trainer.evaluate()
trainer.save_model("./gk_model")

4.35.2 0.25.0


Epoch,Training Loss,Validation Loss,Mse,Rmse,Mae,R2,Smape
1,8.9343,7.007601,7.007601,2.647187,2.133494,0.011576,100.144567
2,8.9815,7.112775,7.112775,2.666979,1.90068,-0.003259,102.465695
3,8.2151,6.815425,6.815426,2.610637,1.949064,0.038682,101.497653
4,8.4987,6.775809,6.775809,2.603038,2.067588,0.04427,101.547047
5,7.3465,6.661507,6.661508,2.58099,1.990226,0.060392,105.141054
6,7.9125,6.806548,6.806548,2.608936,1.956597,0.039934,105.69615
7,7.2291,7.04859,7.04859,2.654918,1.973709,0.005794,106.11487
8,7.1014,7.057075,7.057074,2.656515,1.915483,0.004598,107.828518


Epoch,Training Loss,Validation Loss,Mse,Rmse,Mae,R2,Smape
1,8.9343,7.007601,7.007601,2.647187,2.133494,0.011576,100.144567
2,8.9815,7.112775,7.112775,2.666979,1.90068,-0.003259,102.465695
3,8.2151,6.815425,6.815426,2.610637,1.949064,0.038682,101.497653
4,8.4987,6.775809,6.775809,2.603038,2.067588,0.04427,101.547047
5,7.3465,6.661507,6.661508,2.58099,1.990226,0.060392,105.141054
6,7.9125,6.806548,6.806548,2.608936,1.956597,0.039934,105.69615
7,7.2291,7.04859,7.04859,2.654918,1.973709,0.005794,106.11487
8,7.1014,7.057075,7.057074,2.656515,1.915483,0.004598,107.828518
9,7.0725,6.615871,6.615871,2.572134,1.847614,0.066829,102.914449
10,7.5472,7.154133,7.154133,2.674721,1.918396,-0.009092,110.099838


In [53]:
!export 'PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512'

Mid Section

In [6]:
import gc
torch.cuda.empty_cache()
gc.collect()
df_gk = pd.read_csv('MID_6.csv')
X_gk = df_gk['input']
Y_gk = df_gk['output']
X_train, X_test, y_train, y_test = train_test_split(X_gk.tolist(), Y_gk, test_size=0.33)
tokenizer = AutoTokenizer.from_pretrained('allenai/longformer-base-4096')
train_encode = tokenizer(X_train, truncation=True, padding=True, max_length=2000)
test_encode = tokenizer(X_test, truncation=True, padding=True, max_length=2000)
train = MakeDataset(train_encode, y_train.ravel())
test = MakeDataset(test_encode, y_test.ravel())
model_mid = AutoModelForSequenceClassification.from_pretrained('allenai/longformer-base-4096', num_labels = 1).cuda()


config.json:   0%|          | 0.00/694 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/597M [00:00<?, ?B/s]

Some weights of LongformerForSequenceClassification were not initialized from the model checkpoint at allenai/longformer-base-4096 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [22]:
from google.colab import drive
drive.mount('/content/gdrive')
!cp MID_6.csv /content/gdrive/My\ Drive

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [18]:
import gc
torch.cuda.empty_cache()
gc.collect()
training_args = TrainingArguments(
    output_dir ='./results',
    num_train_epochs = 30,
    per_device_train_batch_size = 8,
    per_device_eval_batch_size = 8,
    weight_decay = 0.01,
    learning_rate = 2e-5,
    logging_dir = './logs',
    logging_steps = 100,
    save_total_limit = 10,
    load_best_model_at_end = True,
    metric_for_best_model = 'rmse',
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
)

trainer_mid = Trainer(
    model = model_mid,
    args = training_args,
    train_dataset = train,
    eval_dataset = test,
    compute_metrics = comp_metrics,
)

trainer_mid.train()
trainer_mid.evaluate()
trainer_mid.save_model("./mid_model")
!cp -r mid_model /content/gdrive/My\ Drive

Epoch,Training Loss,Validation Loss,Mse,Rmse,Mae,R2,Smape
1,7.2637,6.952779,6.95278,2.636812,1.675276,0.107895,107.194055
2,6.4287,7.130577,7.130576,2.670314,1.505596,0.085083,113.619968
3,6.8912,7.97359,7.97359,2.823755,1.645583,-0.023084,109.982356
4,8.0634,7.646421,7.646421,2.765216,1.762482,0.018895,108.565032
5,8.7129,7.729809,7.729809,2.780253,1.621939,0.008196,112.110683
6,6.618,7.099077,7.099077,2.664409,1.583199,0.089124,113.409585
7,6.144,7.008065,7.008065,2.647275,1.522975,0.100802,109.623146
8,5.9707,6.900882,6.900882,2.626953,1.543923,0.114554,111.375281
9,7.8183,6.93897,6.93897,2.634192,1.564646,0.109667,110.666338
10,7.4676,6.890084,6.890084,2.624897,1.587903,0.11594,110.385949


Defender Section

In [24]:
import gc
torch.cuda.empty_cache()
gc.collect()
df_def = pd.read_csv('DEF_6.csv')
X_def = df_def['input']
Y_def = df_def['output']
X_train, X_test, y_train, y_test = train_test_split(X_def.tolist(), Y_def, test_size=0.33)
tokenizer = AutoTokenizer.from_pretrained('allenai/longformer-base-4096')
train_encode = tokenizer(X_train, truncation=True, padding=True, max_length=2000)
test_encode = tokenizer(X_test, truncation=True, padding=True, max_length=2000)
train = MakeDataset(train_encode, y_train.ravel())
test = MakeDataset(test_encode, y_test.ravel())
model_def = AutoModelForSequenceClassification.from_pretrained('allenai/longformer-base-4096', num_labels = 1).cuda()

Some weights of LongformerForSequenceClassification were not initialized from the model checkpoint at allenai/longformer-base-4096 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [66]:
import gc
torch.cuda.empty_cache()
gc.collect()
training_args = TrainingArguments(
    output_dir ='./results_def',
    num_train_epochs = 20,
    per_device_train_batch_size =8,
    per_device_eval_batch_size = 8,
    weight_decay = 0.01,
    learning_rate = 2e-5,
    logging_dir = './logs',
    logging_steps = 100,
    save_total_limit = 10,
    load_best_model_at_end = True,
    metric_for_best_model = 'rmse',
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
)

trainer_def = Trainer(
    model = model_def,
    args = training_args,
    train_dataset = train,
    eval_dataset = test,
    compute_metrics = comp_metrics,
)

trainer_def.train()
trainer_def.evaluate()
trainer_def.save_model("./def_model")
!cp -r def_model /content/gdrive/My\ Drive

Epoch,Training Loss,Validation Loss,Mse,Rmse,Mae,R2,Smape
1,8.4587,8.863748,8.863748,2.977205,2.187564,0.000657,125.629583
2,8.1318,8.914737,8.914737,2.985756,2.318015,-0.005092,127.299312
3,7.599,8.726077,8.726077,2.953993,2.145411,0.016178,126.951479
4,7.3442,8.550239,8.550239,2.924079,1.893673,0.036003,127.978792
5,8.8623,8.393139,8.393139,2.897091,1.925326,0.053715,129.834917
6,6.6538,8.586635,8.586634,2.930296,1.888369,0.0319,131.078083
7,7.3591,8.708015,8.708015,2.950935,1.839748,0.018215,131.487729
8,6.8471,8.409163,8.409163,2.899856,1.877126,0.051909,127.909292
9,7.1757,8.461371,8.461371,2.908844,1.972277,0.046023,130.096865
10,7.17,8.413882,8.413882,2.900669,2.009926,0.051377,129.399687


In [73]:
import gc
torch.cuda.empty_cache()
gc.collect()
df_fwd = pd.read_csv('FWD_6.csv')
X_fwd = df_fwd['input']
Y_fwd= df_fwd['output']
X_train, X_test, y_train, y_test = train_test_split(X_fwd.tolist(), Y_fwd, test_size=0.33)
tokenizer = AutoTokenizer.from_pretrained('allenai/longformer-base-4096')
train_encode = tokenizer(X_train, truncation=True, padding=True, max_length=2000)
test_encode = tokenizer(X_test, truncation=True, padding=True, max_length=2000)
train = MakeDataset(train_encode, y_train.ravel())
test = MakeDataset(test_encode, y_test.ravel())
model_fwd = AutoModelForSequenceClassification.from_pretrained('allenai/longformer-base-4096', num_labels = 1).cuda()

Some weights of LongformerForSequenceClassification were not initialized from the model checkpoint at allenai/longformer-base-4096 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [74]:
import gc
torch.cuda.empty_cache()
gc.collect()
training_args = TrainingArguments(
    output_dir ='./results',
    num_train_epochs = 20,
    per_device_train_batch_size = 8,
    per_device_eval_batch_size = 8,
    weight_decay = 0.01,
    learning_rate = 2e-4,
    logging_dir = './logs',
    logging_steps = 100,
    save_total_limit = 10,
    load_best_model_at_end = True,
    metric_for_best_model = 'rmse',
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
)

trainer_fwd = Trainer(
    model = model_fwd,
    args = training_args,
    train_dataset = train,
    eval_dataset = test,
    compute_metrics = comp_metrics,
)

trainer_fwd.train()
trainer_fwd.evaluate()
trainer_fwd.save_model("./fwd_model")
!cp -r fwd_model /content/gdrive/My\ Drive

Epoch,Training Loss,Validation Loss,Mse,Rmse,Mae,R2,Smape
1,8.7633,9.693415,9.693415,3.113425,1.945278,-0.037722,112.675767
2,8.4451,9.61735,9.617351,3.101185,1.958326,-0.029579,112.223477
3,8.9485,9.70833,9.70833,3.115819,1.94287,-0.039319,112.758195
4,8.4832,9.459307,9.459307,3.075598,2.322741,-0.01266,115.121009
5,9.1113,9.407578,9.407578,3.067178,2.009737,-0.007122,110.392373
6,9.3484,9.558799,9.5588,3.091731,2.399804,-0.023311,116.39606
7,8.4484,9.475462,9.475462,3.078224,1.988861,-0.01439,111.142408
8,8.2898,9.586755,9.586755,3.096248,1.964078,-0.026304,112.021937
9,8.4274,9.34389,9.34389,3.056778,2.140654,-0.000304,111.65567
10,9.6277,9.354551,9.354552,3.058521,2.036955,-0.001446,109.40942
