In [1]:
!pip install coolname

Collecting coolname
  Downloading coolname-2.0.0-py2.py3-none-any.whl (37 kB)
Installing collected packages: coolname
Successfully installed coolname-2.0.0
[0m

In [2]:
import os
import gc
import copy
import time
import random
import string
import joblib

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader

# Utils|
from tqdm import tqdm
from collections import defaultdict
from feedback_custom_funtions import loss_fn, optimizer_setup, FeedBackDataset, RMSELoss, compute_metrics
from model_building import MeanPooling, MaxPooling, MinPooling, AttentionPooling, FeedBackModel
from coolname import generate_slug

# For Transformer Models
from transformers import AutoTokenizer, AutoModel, AutoConfig
from transformers import AdamW, get_linear_schedule_with_warmup
from transformers import DataCollatorWithPadding
from transformers import Trainer, TrainingArguments
from transformers.modeling_outputs import SequenceClassifierOutput

# Suppress warnings
import warnings
warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.makedirs("/kaggle/tmp/", exist_ok=True) 

## Training Confg

In [3]:
def set_seed(seed=42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)

hash_name = generate_slug(3)

config = {"seed": 42,
          "epochs": 5,
          "debug" : False,
          "model_name": "microsoft/deberta-v3-large",
          "PoolingLayer": AttentionPooling(1024),
          "group" : "deberta-v3-Large-AP-LLRD" ,
          "loss_type": "smooth_l1", # ['mse', 'rmse', 'smooth_l1']
          "train_batch_size": 4,
          "valid_batch_size": 8,
          "fp16_enable": True,
          "max_length": 512,
          "layerwise" : True,
          "learning_rate": 1e-5,
          "decoder_lr": 1e-4,
          "weight_decay": 1e-6,
          "n_fold": 4,
          "n_accumulate": 4,
          "max_grad_norm": 1000,
          "num_classes": 6,
          "target_cols": ["cohesion", "syntax", "vocabulary", 
                          "phraseology", "grammar", "conventions"],
          "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
          "hash_name": hash_name,
          "competition": "FeedBack3",
          "_wandb_kernel": "hazrul"
          }

set_seed(config['seed'])

In [4]:
if not config["debug"]:    
    import wandb

    try:
        from kaggle_secrets import UserSecretsClient
        user_secrets = UserSecretsClient()
        api_key = user_secrets.get_secret("WANDB_API_KEY")
        wandb.login(key=api_key)
        anony = None
        print("wandb Logged in Successfully")
    except:
        anony = "must"
        print('If you want to use your W&B account, go to Add-ons -> Secrets and provide your W&B access token. Use the Label name as wandb_api. \nGet your W&B access token from here: https://wandb.ai/authorize')
else:
    os.environ["WANDB_DISABLED"] = "true"
    print("Debugging...")

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


wandb Logged in Successfully


## Data Loading

In [5]:
df = pd.read_csv("/kaggle/input/feedbackprizemultilabelstratifiedkfold/kfold_train_FB_comptetion.csv")
df.head()

Unnamed: 0,text_id,full_text,cohesion,syntax,vocabulary,phraseology,grammar,conventions,kfold
0,0016926B079C,I think that students would benefit from learn...,3.5,3.5,3.0,3.0,4.0,3.0,1
1,0022683E9EA5,When a problem is a change you have to let it ...,2.5,2.5,3.0,2.0,2.0,2.5,3
2,00299B378633,"Dear, Principal\n\nIf u change the school poli...",3.0,3.5,3.0,3.0,3.0,2.5,2
3,003885A45F42,The best time in life is when you become yours...,4.5,4.5,4.5,4.5,4.0,5.0,3
4,0049B1DF5CCC,Small act of kindness can impact in other peop...,2.5,3.0,3.0,3.0,2.5,2.5,3


In [6]:
tokenizer = AutoTokenizer.from_pretrained(config["model_name"])
config["tokenizer"] = tokenizer

collate_fn = DataCollatorWithPadding(tokenizer=config['tokenizer'])

Downloading:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/580 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.35M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


## Training Setup

In [7]:
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        outputs = model(inputs['input_ids'], inputs['attention_mask'])
        loss = loss_fn(outputs.logits, inputs['target'], loss_type=config['loss_type'])
        return (loss, outputs) if return_outputs else loss

In [8]:
for fold in range(1, config['n_fold']):
    print(f"========== Fold: {fold} ==========")
    
    if not config["debug"]:
        run = wandb.init(project=config['competition'], 
                         config=config,
                         job_type='Train',
                         group=config['group'],
                         tags=[config['model_name'], config['loss_type']],
                         name=f'{config["hash_name"]}-fold-{fold}',
                         anonymous='must')

    df_train = df[df.kfold != fold].reset_index(drop=True)
    df_valid = df[df.kfold == fold].reset_index(drop=True)

    train_dataset = FeedBackDataset(df_train, tokenizer=config['tokenizer'], max_length=config['max_length'], target_label = config["target_cols"])
    valid_dataset = FeedBackDataset(df_valid, tokenizer=config['tokenizer'], max_length=config['max_length'], target_label = config["target_cols"])

    model = FeedBackModel(config['model_name'], config["num_classes"], PoolingLayer = config["PoolingLayer"]).to(config['device'])

    # Define Optimizer and Scheduler
    optimizer, scheduler = optimizer_setup(model=model, 
                                           config=config, 
                                           train_dataset_size =len(train_dataset),
                                           layerwise = config["layerwise"]
                                          )

    training_args = TrainingArguments(
        output_dir=f"/kaggle/tmp/outputs-{fold}/",
        evaluation_strategy="epoch",
        logging_strategy="epoch",
        per_device_train_batch_size=config['train_batch_size'],
        per_device_eval_batch_size=config['valid_batch_size'],
        num_train_epochs= config['epochs'],
        learning_rate= config['learning_rate'],
        weight_decay= config['weight_decay'],
        gradient_accumulation_steps=config['n_accumulate'],
        max_grad_norm=config['max_grad_norm'],
        seed=config['seed'],
        fp16  = config["fp16_enable"],
        fp16_full_eval  = config["fp16_enable"],
        group_by_length = True,
        half_precision_backend = "cuda_amp",
        metric_for_best_model= 'eval_mcrmse',
        load_best_model_at_end=True,
        greater_is_better=False,
        save_strategy="epoch",
        save_total_limit=1,
        report_to = "wandb",
        label_names = ["target"]
    )


    trainer = CustomTrainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=valid_dataset,
        data_collator=collate_fn,
        optimizers=(optimizer, scheduler),
        compute_metrics=compute_metrics
    )

    trainer.train()
    trainer.save_model()

    #evaluation = trainer.evaluate()
    #run.log({"score_mcrmse": evaluation["eval_mcrmse"], "eval_runtime": evaluation["eval_runtime"]})
    if not config["debug"]:
        run.finish()

    del model, train_dataset, valid_dataset
    torch.cuda.empty_cache()
    gc.collect()

[34m[1mwandb[0m: Currently logged in as: [33mhazrulakmal[0m. Use [1m`wandb login --relogin`[0m to force relogin




Downloading:   0%|          | 0.00/833M [00:00<?, ?B/s]

Some weights of the model checkpoint at microsoft/deberta-v3-large were not used when initializing DebertaV2Model: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.bias', 'mask_predictions.dense.weight', 'mask_predictions.dense.bias', 'mask_predictions.LayerNorm.bias', 'mask_predictions.classifier.weight', 'mask_predictions.classifier.bias', 'lm_predictions.lm_head.dense.weight', 'mask_predictions.LayerNorm.weight']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Using cuda_amp half 

Epoch,Training Loss,Validation Loss,Mcrmse,Cohesion Rmse,Syntax Rmse,Vocabulary Rmse,Phraseology Rmse,Grammar Rmse,Conventions Rmse
0,0.6555,0.119582,0.490918,0.533877,0.465856,0.478416,0.485549,0.514728,0.467083
1,0.1397,0.10925,0.468507,0.495006,0.451374,0.447736,0.452634,0.499617,0.464674
2,0.1264,0.123965,0.498721,0.600911,0.475318,0.470459,0.48778,0.495478,0.462377
3,0.1183,0.109836,0.469683,0.511261,0.45678,0.430009,0.468661,0.494167,0.457218
4,0.1102,0.105246,0.4596,0.486948,0.447524,0.423641,0.446978,0.4936,0.458912


***** Running Evaluation *****
  Num examples = 979
  Batch size = 8
Saving model checkpoint to /kaggle/tmp/outputs-1/checkpoint-183
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
***** Running Evaluation *****
  Num examples = 979
  Batch size = 8
Saving model checkpoint to /kaggle/tmp/outputs-1/checkpoint-366
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [/kaggle/tmp/outputs-1/checkpoint-183] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 979
  Batch size = 8
Saving model checkpoint to /kaggle/tmp/outputs-1/checkpoint-549
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
***** Running Evaluation *****
  Num examples = 979
  Batch size = 8
Saving model checkpoint to /kaggle/tmp/outputs-1/checkpoint-732
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [/kaggle/tmp/outputs-1/checkpoint-549] due to args.save_total_limit


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/cohesion_rmse,▄▁█▂▁
eval/conventions_rmse,█▆▅▁▂
eval/grammar_rmse,█▃▂▁▁
eval/loss,▆▂█▃▁
eval/mcrmse,▇▃█▃▁
eval/phraseology_rmse,█▂█▅▁
eval/runtime,▂▁▁▆█
eval/samples_per_second,▇██▃▁
eval/steps_per_second,▇██▃▁
eval/syntax_rmse,▆▂█▃▁

0,1
eval/cohesion_rmse,0.48695
eval/conventions_rmse,0.45891
eval/grammar_rmse,0.4936
eval/loss,0.10525
eval/mcrmse,0.4596
eval/phraseology_rmse,0.44698
eval/runtime,89.1817
eval/samples_per_second,10.978
eval/steps_per_second,1.379
eval/syntax_rmse,0.44752


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


loading configuration file https://huggingface.co/microsoft/deberta-v3-large/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/f5d66efa509542e643c08a1579633e747d1697b1bec7de32c51c6969a16e81b9.3554ddad32be74b53d95a4b5760f07a2cd799268a921ae9437b1ee7a47adebc9
Model config DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-large",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 1024,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_

Epoch,Training Loss,Validation Loss,Mcrmse,Cohesion Rmse,Syntax Rmse,Vocabulary Rmse,Phraseology Rmse,Grammar Rmse,Conventions Rmse
0,0.624,0.119669,0.489521,0.571417,0.473922,0.445101,0.477151,0.497821,0.471713
1,0.1424,0.115761,0.481797,0.53788,0.472389,0.451559,0.476061,0.500974,0.451918
2,0.1306,0.120265,0.491158,0.553021,0.470494,0.48613,0.501593,0.489833,0.445877
3,0.1226,0.113088,0.476043,0.526573,0.466251,0.451297,0.475307,0.490174,0.446659
4,0.1142,0.107469,0.464011,0.4949,0.462923,0.421621,0.46561,0.49568,0.443333


***** Running Evaluation *****
  Num examples = 972
  Batch size = 8
Saving model checkpoint to /kaggle/tmp/outputs-2/checkpoint-183
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
***** Running Evaluation *****
  Num examples = 972
  Batch size = 8
Saving model checkpoint to /kaggle/tmp/outputs-2/checkpoint-366
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [/kaggle/tmp/outputs-2/checkpoint-183] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 972
  Batch size = 8
Saving model checkpoint to /kaggle/tmp/outputs-2/checkpoint-549
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
***** Running Evaluation *****
  Num examples = 972
  Batch size = 8
Saving model checkpoint to /kaggle/tmp/outputs-2/checkpoint-732
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [/kaggle/tmp/outputs-2/checkpoint-366] due to args.save_total_limit


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/cohesion_rmse,█▅▆▄▁
eval/conventions_rmse,█▃▂▂▁
eval/grammar_rmse,▆█▁▁▅
eval/loss,█▆█▄▁
eval/mcrmse,█▆█▄▁
eval/phraseology_rmse,▃▃█▃▁
eval/runtime,▂█▁▄▃
eval/samples_per_second,▇▁█▅▆
eval/steps_per_second,█▁█▅▇
eval/syntax_rmse,█▇▆▃▁

0,1
eval/cohesion_rmse,0.4949
eval/conventions_rmse,0.44333
eval/grammar_rmse,0.49568
eval/loss,0.10747
eval/mcrmse,0.46401
eval/phraseology_rmse,0.46561
eval/runtime,87.8416
eval/samples_per_second,11.065
eval/steps_per_second,1.389
eval/syntax_rmse,0.46292


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


loading configuration file https://huggingface.co/microsoft/deberta-v3-large/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/f5d66efa509542e643c08a1579633e747d1697b1bec7de32c51c6969a16e81b9.3554ddad32be74b53d95a4b5760f07a2cd799268a921ae9437b1ee7a47adebc9
Model config DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-large",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 1024,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_

Epoch,Training Loss,Validation Loss,Mcrmse,Cohesion Rmse,Syntax Rmse,Vocabulary Rmse,Phraseology Rmse,Grammar Rmse,Conventions Rmse
0,0.6072,0.119475,0.490002,0.557049,0.444237,0.480255,0.512704,0.487765,0.458005
1,0.1419,0.11535,0.480909,0.561097,0.447162,0.452858,0.501808,0.477023,0.445506
2,0.1252,0.107359,0.464495,0.503046,0.437213,0.448171,0.469821,0.483411,0.445308
3,0.1158,0.108978,0.468103,0.496291,0.442291,0.459248,0.474233,0.487118,0.449439
4,0.1054,0.106425,0.462756,0.483304,0.442764,0.44009,0.475292,0.484292,0.450794


***** Running Evaluation *****
  Num examples = 981
  Batch size = 8
Saving model checkpoint to /kaggle/tmp/outputs-3/checkpoint-183
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
***** Running Evaluation *****
  Num examples = 981
  Batch size = 8
Saving model checkpoint to /kaggle/tmp/outputs-3/checkpoint-366
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [/kaggle/tmp/outputs-3/checkpoint-183] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 981
  Batch size = 8
Saving model checkpoint to /kaggle/tmp/outputs-3/checkpoint-549
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [/kaggle/tmp/outputs-3/checkpoint-366] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 981
  Batch size = 8
Saving model checkpoint to /kaggle/tmp/outputs-3/checkpoint-732
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/cohesion_rmse,██▃▂▁
eval/conventions_rmse,█▁▁▃▄
eval/grammar_rmse,█▁▅█▆
eval/loss,█▆▂▂▁
eval/mcrmse,█▆▁▂▁
eval/phraseology_rmse,█▆▁▂▂
eval/runtime,█▁▂▁▂
eval/samples_per_second,▁█▇█▇
eval/steps_per_second,▁█▆█▆
eval/syntax_rmse,▆█▁▅▅

0,1
eval/cohesion_rmse,0.4833
eval/conventions_rmse,0.45079
eval/grammar_rmse,0.48429
eval/loss,0.10643
eval/mcrmse,0.46276
eval/phraseology_rmse,0.47529
eval/runtime,88.3982
eval/samples_per_second,11.098
eval/steps_per_second,1.391
eval/syntax_rmse,0.44276


In [20]:
from pathlib import Path
TMP_DIR = Path('../temp')
TMP_DIR.mkdir(exist_ok=True)

In [43]:
ls ../

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
[0m[01;34moutputs-1[0m/  [01;34moutputs-2[0m/  [01;34moutputs-3[0m/  [01;34mtemp[0m/


In [22]:
ls ../outputs-1

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
[0m[01;34mcheckpoint-915[0m/  output-1.zip  pytorch_model.bin  training_args.bin


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Saving model checkpoint to /kaggle/working/outputs-1
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


In [53]:
path = "../outputs-3/pytorch_model.bin"
model = FeedBackModel(config['model_name'], config["num_classes"], PoolingLayer = config["PoolingLayer"]).to(config['device'])
model.load_state_dict(torch.load(path, map_location= config["device"]))

loading configuration file https://huggingface.co/microsoft/deberta-v3-large/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/f5d66efa509542e643c08a1579633e747d1697b1bec7de32c51c6969a16e81b9.3554ddad32be74b53d95a4b5760f07a2cd799268a921ae9437b1ee7a47adebc9
Model config DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-large",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 1024,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_

<All keys matched successfully>

In [54]:
training_args = TrainingArguments(
    output_dir=f"/kaggle/working/outputs-{3}")

trainer = CustomTrainer(
        model=model,
        args=training_args)

trainer.save_model()

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Saving model checkpoint to /kaggle/working/outputs-3
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


In [41]:
path = f"{config['model_name'].replace('/', '-')}_fold_1_best.pth"

In [44]:
torch.save(model.state_dict(), "./" + path )

In [17]:
from IPython.display import FileLink
import zipfile

In [52]:
FileLink(r'/kaggle/working/outputs-2/pytorch_model.bin')

In [16]:
def zip_dir(directory = os.curdir, file_name = 'output-1.zip'):
    """
    zip all the files in a directory
    
    Parameters
    _____
    directory: str
        directory needs to be zipped, defualt is current working directory
        
    file_name: str
        the name of the zipped file (including .zip), default is 'directory.zip'
        
    Returns
    _____
    Creates a hyperlink, which can be used to download the zip file)
    """
    os.chdir(directory)
    zip_ref = zipfile.ZipFile(file_name, mode='w')
    for folder, _, files in os.walk(directory):
        for file in files:
            if file_name in file:
                pass
            else:
                zip_ref.write(os.path.join(folder, file))

    return FileLink(file_name)

In [23]:
zip_dir('../outputs-1')

In [19]:
zip_dir('/kaggle/tmp/outputs-2', file_name='output-2.zip')

In [None]:
zip_dir('/kaggle/tmp/outputs-3', file_name='output-3.zip')