In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.16.2-py3-none-any.whl (3.5 MB)
[K     |████████████████████████████████| 3.5 MB 8.5 MB/s 
[?25hCollecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)
[K     |████████████████████████████████| 67 kB 7.6 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 57.8 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.47-py2.py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 60.6 MB/s 
[?25hCollecting tokenizers!=0.11.3,>=0.10.1
  Downloading tokenizers-0.11.5-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.8 MB)
[K     |████████████████████████████████| 6.8 MB 46.0 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attempting uninstall: pyyaml
  

In [3]:
import pandas as pd 
import numpy as np 
import os
import torch
import torch.nn as nn

import warnings 
warnings.filterwarnings("ignore")
from tqdm import tqdm
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer, ElectraForSequenceClassification, AdamW
from transformers.optimization import get_cosine_schedule_with_warmup, get_linear_schedule_with_warmup
import re
from sklearn.model_selection import train_test_split

In [4]:
# Random Seed Fix
import random
def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  
    torch.backends.cudnn.deterministic = True  
    torch.backends.cudnn.benchmark = True  
seed_everything()

In [5]:
device = torch.device("cuda")

In [11]:
############# HYPERPARMS ##############
num_epochs = 5
batch_size =128
lr = 0.00001
pretrain = "monologg/koelectra-base-v3-discriminator"

In [12]:
def load_data(path):
  TRAIN = os.path.join(path, 'train_data.csv')
  TEST = os.path.join(path, 'test_data.csv')
  SS = os.path.join(path, 'sample_submission.csv')
  label_dict = {"entailment" : 0, "contradiction" : 1, "neutral" : 2}
  train = pd.read_csv(TRAIN)
  test = pd.read_csv(TEST)
  sample_submission = pd.read_csv(SS)
  train['label'] = train['label'].map(label_dict)

  return train,test,sample_submission

def text_clean(df):
  df["premise_"] = "[CLS]" + df["premise"] + "[SEP]"
  df["hypothesis_"] = df["hypothesis"] + "[SEP]"
  df["text_sum"] = df.premise_ + " " + df.hypothesis_
  df = df[['text_sum','label']]
  return df 

ROOT = '/content/drive/MyDrive/DACON_MONTHLYNLI'
train,test,sample_submission = load_data(ROOT)
clean_train,clean_test  = text_clean(train),text_clean(test)

In [13]:
############# Dataset ##############
class CustomDataset(Dataset):
  
  def __init__(self,dataset,option):
    
    self.dataset = dataset 
    self.option = option
    self.tokenizer = AutoTokenizer.from_pretrained(pretrain)

  
  def __len__(self):
    return len(self.dataset)
  
  def __getitem__(self, idx):
    row = self.dataset.iloc[idx, 0:2].values
    text = row[0]
    #y = row[1]

    inputs = self.tokenizer(
        text, 
        return_tensors='pt',
        truncation=True,
        max_length=70,
        pad_to_max_length=True,
        add_special_tokens=False
        )
    
    input_ids = inputs['input_ids'][0]
    attention_mask = inputs['attention_mask'][0]
    
    if self.option =='train':
        y =row[1]
        return input_ids,attention_mask,y

    return input_ids, attention_mask

In [14]:
############### CV ################
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits = 5,shuffle=True,random_state=42)
folds=[]
for trn_idx,val_idx in skf.split(clean_train['text_sum'],clean_train['label']):
    folds.append((trn_idx,val_idx))

In [17]:
best_models = []

for i,fold in enumerate(range(5)):
    print('===============',i+1,'fold start===============')
    model = ElectraForSequenceClassification.from_pretrained(pretrain,num_labels=3).to(device)
    model=nn.DataParallel(model).to(device)
    optimizer = AdamW(model.parameters(), lr=lr)
    
    train_idx = folds[fold][0]
    valid_idx = folds[fold][1]
    train_data = clean_train.loc[trn_idx]
    val_data = clean_train.loc[valid_idx]
    train_dataset = CustomDataset(train_data,'train')
    valid_dataset = CustomDataset(val_data,'train')
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
    warmup_ratio = 0.1
    total_steps = len(train_loader) * num_epochs
    warmup_step = int(total_steps * warmup_ratio)
    scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=1, num_training_steps=total_steps)
    valid_loss_min = 0.4
    valid_acc_max = 0.8
    
    for epoch in range(num_epochs):
        batches = 0
        total_loss = 0.0
        correct = 0
        total =0
        model.train()
        
        for input_ids_batch, attention_masks_batch, y_batch in tqdm(train_loader):
            optimizer.zero_grad()
            y_batch = y_batch.to(device)
            y_pred = model(input_ids_batch.to(device), attention_mask = attention_masks_batch.to(device))[0]
            loss = F.cross_entropy(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            _, predicted = torch.max(y_pred, 1)
            correct += (predicted == y_batch).sum()
            total += len(y_batch)
            batches += 1
            if batches % 100 == 0:
                print("Batch Loss: ", total_loss, "Accuracy: ", correct.float() / total)
      
        val_loss = []
        val_acc = []
        
        for input_ids_batch, attention_masks_batch, y_batch in tqdm(valid_loader):
            
            model.eval()
            with torch.no_grad():
                
                y_pred = model(input_ids_batch.to(device), attention_mask=attention_masks_batch.to(device))[0]
                valid_loss = F.cross_entropy(y_pred,y_batch.to(device)).cpu().detach().numpy()

                preds = torch.argmax(y_pred,1)
                preds = preds.cpu().detach().numpy()
                y_batch = y_batch.cpu().detach().numpy()
                batch_acc = (preds==y_batch).mean()
                val_loss.append(valid_loss)
                val_acc.append(batch_acc)
                
                
        val_loss = np.mean(val_loss)
        val_acc = np.mean(val_acc)
        scheduler.step()
        print(f'Epoch: {epoch} - valid Loss: {val_loss:.6f} - valid_acc : {val_acc:.6f}')
        print(optimizer.param_groups[0]["lr"])
        if valid_acc_max < val_acc:
            valid_acc_max = val_acc
            best_models.append(model)
            torch.save(model, f'koelectra-{len(best_models)}.pth') 
            print('model save, model val acc : ',val_acc)
            print('best_models size : ',len(best_models))



Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: 

Batch Loss:  109.82958447933197 Accuracy:  tensor(0.3351, device='cuda:0')


100%|██████████| 157/157 [03:53<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.76it/s]


Epoch: 0 - valid Loss: 1.099186 - valid_acc : 0.329492
1e-05


 64%|██████▎   | 100/157 [02:30<01:25,  1.50s/it]

Batch Loss:  86.87953099608421 Accuracy:  tensor(0.6357, device='cuda:0')


100%|██████████| 157/157 [03:54<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.77it/s]


Epoch: 1 - valid Loss: 0.373736 - valid_acc : 0.878125
9.999959857256764e-06
model save, model val acc :  0.878125
best_models size :  1


 64%|██████▎   | 100/157 [02:29<01:25,  1.49s/it]

Batch Loss:  39.549238085746765 Accuracy:  tensor(0.8607, device='cuda:0')


100%|██████████| 157/157 [03:54<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.76it/s]


Epoch: 2 - valid Loss: 0.240497 - valid_acc : 0.926172
9.999839429671632e-06
model save, model val acc :  0.926171875
best_models size :  2


 64%|██████▎   | 100/157 [02:30<01:30,  1.58s/it]

Batch Loss:  27.69295635819435 Accuracy:  tensor(0.9105, device='cuda:0')


100%|██████████| 157/157 [03:55<00:00,  1.50s/it]
100%|██████████| 40/40 [00:23<00:00,  1.73it/s]


Epoch: 3 - valid Loss: 0.150744 - valid_acc : 0.960156
9.99963871917832e-06
model save, model val acc :  0.96015625
best_models size :  3


 64%|██████▎   | 100/157 [02:30<01:25,  1.50s/it]

Batch Loss:  22.35672189295292 Accuracy:  tensor(0.9295, device='cuda:0')


100%|██████████| 157/157 [03:54<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.76it/s]


Epoch: 4 - valid Loss: 0.103663 - valid_acc : 0.973437
9.999357728999657e-06
model save, model val acc :  0.9734375
best_models size :  4


Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: 

Batch Loss:  109.85369527339935 Accuracy:  tensor(0.3387, device='cuda:0')


100%|██████████| 157/157 [03:54<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.76it/s]


Epoch: 0 - valid Loss: 1.098797 - valid_acc : 0.334766
1e-05


 64%|██████▎   | 100/157 [02:29<01:25,  1.50s/it]

Batch Loss:  89.6503210067749 Accuracy:  tensor(0.6136, device='cuda:0')


100%|██████████| 157/157 [03:54<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.77it/s]


Epoch: 1 - valid Loss: 0.398754 - valid_acc : 0.868555
9.999959857256764e-06
model save, model val acc :  0.8685546875
best_models size :  5


 64%|██████▎   | 100/157 [02:29<01:25,  1.50s/it]

Batch Loss:  40.140067636966705 Accuracy:  tensor(0.8620, device='cuda:0')


100%|██████████| 157/157 [03:54<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.77it/s]


Epoch: 2 - valid Loss: 0.265000 - valid_acc : 0.918359
9.999839429671632e-06
model save, model val acc :  0.918359375
best_models size :  6


 64%|██████▎   | 100/157 [02:29<01:25,  1.50s/it]

Batch Loss:  29.161745637655258 Accuracy:  tensor(0.9027, device='cuda:0')


100%|██████████| 157/157 [03:54<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.77it/s]


Epoch: 3 - valid Loss: 0.177499 - valid_acc : 0.948828
9.99963871917832e-06
model save, model val acc :  0.948828125
best_models size :  7


 64%|██████▎   | 100/157 [02:29<01:25,  1.50s/it]

Batch Loss:  21.272604323923588 Accuracy:  tensor(0.9332, device='cuda:0')


100%|██████████| 157/157 [03:53<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.77it/s]


Epoch: 4 - valid Loss: 0.126331 - valid_acc : 0.966016
9.999357728999657e-06
model save, model val acc :  0.966015625
best_models size :  8


Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: 

Batch Loss:  110.18184506893158 Accuracy:  tensor(0.3280, device='cuda:0')


100%|██████████| 157/157 [03:53<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.77it/s]


Epoch: 0 - valid Loss: 1.101412 - valid_acc : 0.317773
1e-05


 64%|██████▎   | 100/157 [02:29<01:25,  1.50s/it]

Batch Loss:  92.63584923744202 Accuracy:  tensor(0.5709, device='cuda:0')


100%|██████████| 157/157 [03:54<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.77it/s]


Epoch: 1 - valid Loss: 0.402408 - valid_acc : 0.861523
9.999959857256764e-06
model save, model val acc :  0.8615234375
best_models size :  9


 64%|██████▎   | 100/157 [02:29<01:25,  1.49s/it]

Batch Loss:  40.747379302978516 Accuracy:  tensor(0.8583, device='cuda:0')


100%|██████████| 157/157 [03:54<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.76it/s]


Epoch: 2 - valid Loss: 0.268712 - valid_acc : 0.912500
9.999839429671632e-06
model save, model val acc :  0.9125
best_models size :  10


 64%|██████▎   | 100/157 [02:29<01:25,  1.50s/it]

Batch Loss:  28.86037975549698 Accuracy:  tensor(0.9040, device='cuda:0')


100%|██████████| 157/157 [03:54<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.77it/s]


Epoch: 3 - valid Loss: 0.179030 - valid_acc : 0.947656
9.99963871917832e-06
model save, model val acc :  0.94765625
best_models size :  11


 64%|██████▎   | 100/157 [02:30<01:25,  1.50s/it]

Batch Loss:  21.935907907783985 Accuracy:  tensor(0.9287, device='cuda:0')


100%|██████████| 157/157 [03:54<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.77it/s]


Epoch: 4 - valid Loss: 0.116279 - valid_acc : 0.967578
9.999357728999657e-06
model save, model val acc :  0.967578125
best_models size :  12


Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: 

Batch Loss:  110.36125636100769 Accuracy:  tensor(0.3148, device='cuda:0')


100%|██████████| 157/157 [03:53<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.77it/s]


Epoch: 0 - valid Loss: 1.104044 - valid_acc : 0.315876
1e-05


 64%|██████▎   | 100/157 [02:29<01:25,  1.50s/it]

Batch Loss:  90.4092509150505 Accuracy:  tensor(0.5788, device='cuda:0')


100%|██████████| 157/157 [03:54<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.77it/s]


Epoch: 1 - valid Loss: 0.414693 - valid_acc : 0.857171
9.999959857256764e-06
model save, model val acc :  0.8571707589285715
best_models size :  13


 64%|██████▎   | 100/157 [02:29<01:25,  1.50s/it]

Batch Loss:  41.478720009326935 Accuracy:  tensor(0.8546, device='cuda:0')


100%|██████████| 157/157 [03:54<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.76it/s]


Epoch: 2 - valid Loss: 0.248719 - valid_acc : 0.916406
9.999839429671632e-06
model save, model val acc :  0.91640625
best_models size :  14


 64%|██████▎   | 100/157 [02:29<01:25,  1.50s/it]

Batch Loss:  29.290293276309967 Accuracy:  tensor(0.9022, device='cuda:0')


100%|██████████| 157/157 [03:54<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.77it/s]


Epoch: 3 - valid Loss: 0.171333 - valid_acc : 0.946484
9.99963871917832e-06
model save, model val acc :  0.946484375
best_models size :  15


 64%|██████▎   | 100/157 [02:29<01:25,  1.49s/it]

Batch Loss:  22.315836288034916 Accuracy:  tensor(0.9273, device='cuda:0')


100%|██████████| 157/157 [03:53<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.77it/s]


Epoch: 4 - valid Loss: 0.103948 - valid_acc : 0.970313
9.999357728999657e-06
model save, model val acc :  0.9703125
best_models size :  16


Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: 

Batch Loss:  110.00235271453857 Accuracy:  tensor(0.3355, device='cuda:0')


100%|██████████| 157/157 [03:53<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.76it/s]


Epoch: 0 - valid Loss: 1.099312 - valid_acc : 0.359096
1e-05


 64%|██████▎   | 100/157 [02:29<01:25,  1.50s/it]

Batch Loss:  88.18478208780289 Accuracy:  tensor(0.6120, device='cuda:0')


100%|██████████| 157/157 [03:54<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.77it/s]


Epoch: 1 - valid Loss: 0.459035 - valid_acc : 0.838030
9.999959857256764e-06
model save, model val acc :  0.8380301339285715
best_models size :  17


 64%|██████▎   | 100/157 [02:29<01:25,  1.50s/it]

Batch Loss:  40.47463580965996 Accuracy:  tensor(0.8595, device='cuda:0')


100%|██████████| 157/157 [03:54<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.76it/s]


Epoch: 2 - valid Loss: 0.384307 - valid_acc : 0.866546
9.999839429671632e-06
model save, model val acc :  0.8665457589285716
best_models size :  18


 64%|██████▎   | 100/157 [02:29<01:25,  1.50s/it]

Batch Loss:  28.322804048657417 Accuracy:  tensor(0.9059, device='cuda:0')


100%|██████████| 157/157 [03:54<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.77it/s]


Epoch: 3 - valid Loss: 0.380741 - valid_acc : 0.869085
9.99963871917832e-06
model save, model val acc :  0.8690848214285716
best_models size :  19


 64%|██████▎   | 100/157 [02:30<01:25,  1.50s/it]

Batch Loss:  21.39881782978773 Accuracy:  tensor(0.9309, device='cuda:0')


100%|██████████| 157/157 [03:54<00:00,  1.49s/it]
100%|██████████| 40/40 [00:22<00:00,  1.77it/s]


Epoch: 4 - valid Loss: 0.385521 - valid_acc : 0.873577
9.999357728999657e-06
model save, model val acc :  0.8735770089285715
best_models size :  20


In [18]:
# koelectra-4k번 모델이 가장 성능이 좋은 것으로 가정
test_dataset = CustomDataset(clean_test,'test')
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0)

preds = dict()
for idx, m in enumerate(best_models):
    if (idx+1) % 4 == 0:
      print(f'{idx+1} 번째 모델 예측 진행중')
      bestm = m
      bestm.eval()
      answer = []
      with torch.no_grad():
          for input_ids_batch, attention_masks_batch in tqdm(test_loader):
              y_pred = bestm(input_ids_batch.to(device), attention_mask=attention_masks_batch.to(device))[0].detach().cpu().numpy()
              answer.extend(y_pred)
      preds[idx+1] = answer

4 번째 모델 예측 진행중


100%|██████████| 27/27 [00:07<00:00,  3.52it/s]


8 번째 모델 예측 진행중


100%|██████████| 27/27 [00:07<00:00,  3.57it/s]


12 번째 모델 예측 진행중


100%|██████████| 27/27 [00:07<00:00,  3.51it/s]


16 번째 모델 예측 진행중


100%|██████████| 27/27 [00:07<00:00,  3.43it/s]


20 번째 모델 예측 진행중


100%|██████████| 27/27 [00:08<00:00,  3.34it/s]


In [29]:
df = pd.DataFrame()
for key in preds.keys():
  df = pd.concat([df, pd.DataFrame(np.array(preds[key]))], axis =1 )
df.columns = [i for i in range(3*5)]
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,-1.813181,3.790997,-1.812176,-1.720752,3.570578,-1.937155,-1.712877,3.756066,-1.933276,-2.062337,3.668516,-1.805922,-1.981519,3.389702,-1.821765
1,-0.082987,-1.965420,2.308169,0.002843,-2.154179,2.360120,-0.327420,-2.401240,2.605109,-0.093013,-2.719182,2.769421,-0.062618,-2.263453,2.627895
2,0.749909,-2.242893,1.537016,2.553719,-2.144502,-0.164674,2.476145,-2.435955,-0.384265,0.788575,-2.690581,1.867146,2.160340,-2.659138,0.019321
3,-2.165525,3.519262,-1.113425,-2.150406,3.448496,-1.279370,-1.688352,3.543314,-1.662793,-2.727901,3.360396,-0.711315,-2.270495,3.052619,-0.945890
4,0.083742,1.967190,-1.975008,0.350241,1.986103,-2.371563,-0.147538,2.298349,-2.104549,-0.433285,2.022959,-1.663125,-0.197520,1.723815,-2.031419
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1661,-1.627669,-1.647104,3.591126,-1.384216,-1.830634,3.525597,-1.801290,-1.598683,3.555557,-1.023092,-2.277959,3.400325,-1.537982,-1.589641,3.705833
1662,-0.015603,-2.407104,2.783002,0.863470,-2.353145,1.929824,0.423998,-2.627591,2.029228,0.666282,-2.513784,1.693954,0.458724,-2.505109,2.226074
1663,-0.482072,-2.263233,3.078904,0.503921,-2.433107,2.378650,-0.612951,-2.443218,3.004122,0.462184,-2.902555,2.467948,-0.371765,-2.345434,3.127222
1664,-1.175655,-2.110607,3.655845,-1.044451,-2.146047,3.555725,-1.488245,-1.955118,3.518110,-0.855425,-2.385209,3.310996,-1.172345,-1.978459,3.751168


In [None]:
softvoted_prediction = 

In [36]:
from google.colab import files
files.download(saved_models/)

drive  sample_data  saved_models


In [37]:
!zip -r saved_models.zip saved_models/

  adding: saved_models/ (stored 0%)
  adding: saved_models/koelectra-9.pth (deflated 8%)
  adding: saved_models/koelectra-12.pth (deflated 8%)
  adding: saved_models/koelectra-1.pth (deflated 8%)
  adding: saved_models/koelectra-19.pth (deflated 8%)
  adding: saved_models/koelectra-11.pth (deflated 8%)
  adding: saved_models/koelectra-7.pth (deflated 8%)
  adding: saved_models/koelectra-20.pth (deflated 8%)
  adding: saved_models/koelectra-6.pth (deflated 8%)
  adding: saved_models/koelectra-13.pth (deflated 8%)
  adding: saved_models/koelectra-16.pth (deflated 8%)
  adding: saved_models/koelectra-8.pth (deflated 8%)
  adding: saved_models/koelectra-10.pth (deflated 8%)
  adding: saved_models/koelectra-17.pth (deflated 8%)
  adding: saved_models/koelectra-15.pth (deflated 8%)
  adding: saved_models/koelectra-2.pth (deflated 8%)
  adding: saved_models/koelectra-18.pth (deflated 8%)
  adding: saved_models/koelectra-14.pth (deflated 8%)
  adding: saved_models/koelectra-3.pth (deflated 8%)