## Multilingual GPT3: sentence classification

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import datetime
import os
import json
import numpy as np

import torch
from torch.nn import CrossEntropyLoss
from transformers import GPT2LMHeadModel, GPT2Tokenizer

from evaluate.mgpt_classification import evaluate_task, PAWSXTask, Metrics
from evaluate.mgpt_classification_configs import PAWSXTaskConfig

from inference import load_mgpt

In [None]:
model = load_mgpt("sberbank-ai/mGPT")

### PAWSX example

In [None]:
task_name = "PAWSX"
shots = 0

In [None]:
task_config = PAWSXTaskConfig(shot_nums=shots)
task = PAWSXTask(task_config)

In [None]:
y_true, y_pred = task.predict(model)

In [None]:
metric = Metrics(task_name)
result = metric.calculate_metric(y_true, y_pred)

### All tasks

In [None]:
shots_list = [0, 1, 2]

In [None]:
results_root = 'results/'

os.makedirs(results_root, exist_ok=True)
tasks = ['PAWSX']
models = [('model', model)]
results = {}
start_time = str(datetime.datetime.now())
result_fname = results_root + 'result_' + start_time + '.json'
for task_name in tasks:
    print('TASK', task_name)
    results[task_name] = {}
    for name, model in models:
        print('MODEL', name)
        results[task_name][name] = {}
        for shots in shots_list:
            print('FEWSHOTS', shots)
            metrics = evaluate_task(task_name, model, shots)
            results[task_name][name][shots] = metrics
            with open(result_fname, 'w') as f:
                json.dump(results, f, ensure_ascii=False, indent=4, sort_keys=True)
                print('saved to', result_fname)
            print(f'Accuracy on the {task_name} dataset:', *metrics, sep='\n')