### Version of GPU to be used
To replicate the results, the GPU used must be Tesla v100-sxm2-16gb on Colab Pro as mentioned below.

In [1]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Mon May 31 16:18:50 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P0    25W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Libraries to be installed
The following libraries will be needed for execution of the code and they are the updated versions

In [2]:
# !pip install --upgrade simpletransformers
# !pip install --upgrade rouge-score

## Setting up the seed
We set up the seed over here to ensure code produces the same result 

In [3]:
import torch.multiprocessing
torch.multiprocessing.set_sharing_strategy('file_system')

In [4]:
import pandas as pd
import numpy as np
import random
def seed_all(seed_value):
    random.seed(seed_value) # Python
    np.random.seed(seed_value) # cpu vars
    torch.manual_seed(seed_value) # cpu  vars
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value) # gpu vars
        torch.backends.cudnn.deterministic = True  #needed
        torch.backends.cudnn.benchmark = False

seed_all(13)

In [5]:
train_df = pd.read_csv('Train.csv')
test_df  = pd.read_csv('Test.csv')

In [6]:
from simpletransformers.t5 import T5Model, T5Args
import logging


In [7]:
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)


## Declaring the model arguments

In [8]:
model_args                             = T5Args()
model_args.max_seq_length              = 150

model_args.train_batch_size            = 2

model_args.eval_batch_size             = 2

model_args.num_train_epochs            = 7

model_args.evaluate_during_training    = True

model_args.evaluate_during_training_verbose = True

model_args.use_multiprocessing         = False

model_args.fp16                        = False
model_args.feed_forward_proj           = "gated-gelu"
model_args.dropout_rate                = 0.02

model_args.dataloader_num_workers      = 0

model_args.save_steps                  = -1

model_args.save_eval_checkpoints       = False

model_args.no_cache                    = True

model_args.reprocess_input_data        = True

model_args.overwrite_output_dir        = True

model_args.preprocess_inputs           = False

model_args.num_return_sequences        = 1
model_args.gradient_accumulation_steps = 1

model_args.manual_seed                 = 13

model_args.max_length                  = 180


model_args.evaluate_generated_text     = True

model_args.optimizer                   = 'AdamW'
model_args.scheduler                   = 'cosine_with_hard_restarts_schedule_with_warmup'

model_args.use_early_stopping          = True
model_args.warmup_steps                = 100

model_args.num_beams                   = 9
model_args.learning_rate               = 0.0000166

model_args.save_best_model             = True
model_args.no_save                     = True
model_args.save_model_every_epoch      = False
model_args.num_heads                   = 12

model_args.warmup_ratio                = 0.15
model_args.vocab_size                  = 90000
model_args.early_stopping_patience     = 3

model = T5Model("mt5", "Davlan/mt5_base_yor_eng_mt", args=model_args)

INFO:filelock:Lock 140383511309200 acquired on /root/.cache/huggingface/transformers/99eab39a9f871fe24115819883b2f38da2c59377cb93356a01264fe7917fed79.05e17a1a9d14141351d2c463ee8bf2f563fc853531c449d6f324c9308bd0bad6.lock


Downloading:   0%|          | 0.00/673 [00:00<?, ?B/s]

INFO:filelock:Lock 140383511309200 released on /root/.cache/huggingface/transformers/99eab39a9f871fe24115819883b2f38da2c59377cb93356a01264fe7917fed79.05e17a1a9d14141351d2c463ee8bf2f563fc853531c449d6f324c9308bd0bad6.lock
INFO:filelock:Lock 140383511402896 acquired on /root/.cache/huggingface/transformers/f30ecd590bf41407e0d9477f0d0328ece36355557f94985531e39fad23f39312.f1a4c43dbacb88a5b760b6df770e3d66de2ee2ca4fb20f91e094315bbc7ab36c.lock


Downloading:   0%|          | 0.00/2.33G [00:00<?, ?B/s]

INFO:filelock:Lock 140383511402896 released on /root/.cache/huggingface/transformers/f30ecd590bf41407e0d9477f0d0328ece36355557f94985531e39fad23f39312.f1a4c43dbacb88a5b760b6df770e3d66de2ee2ca4fb20f91e094315bbc7ab36c.lock
INFO:filelock:Lock 140383504487760 acquired on /root/.cache/huggingface/transformers/bab1fd3f2ac5822190f48b14f3a978c60f4605cc5df8beaa048be374becfcdff.da687df25d297aebfd515b6699506f3229d24423c0da1a02f45396bfa8197a95.lock


Downloading:   0%|          | 0.00/4.31M [00:00<?, ?B/s]

INFO:filelock:Lock 140383504487760 released on /root/.cache/huggingface/transformers/bab1fd3f2ac5822190f48b14f3a978c60f4605cc5df8beaa048be374becfcdff.da687df25d297aebfd515b6699506f3229d24423c0da1a02f45396bfa8197a95.lock
INFO:filelock:Lock 140383504517520 acquired on /root/.cache/huggingface/transformers/23d50e7fd0cba8d9c53cce44c6949fb6895c231d1bb60e2355113fb3f16cf104.294ebaa4cd17bb284635004c92d2c4d522ec488c828dcce0c2471b6f28e3fe82.lock


Downloading:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

INFO:filelock:Lock 140383504517520 released on /root/.cache/huggingface/transformers/23d50e7fd0cba8d9c53cce44c6949fb6895c231d1bb60e2355113fb3f16cf104.294ebaa4cd17bb284635004c92d2c4d522ec488c828dcce0c2471b6f28e3fe82.lock
INFO:filelock:Lock 140383504518672 acquired on /root/.cache/huggingface/transformers/b70bcd80858f11fa2b3e6e1f870f687baed44b3e9aa0b36fb2eff4542d9296fd.7c48e42ad49cfc9c67c28b17e1215147ae0364ec72bae9f77c34e33b906f511d.lock


Downloading:   0%|          | 0.00/351 [00:00<?, ?B/s]

INFO:filelock:Lock 140383504518672 released on /root/.cache/huggingface/transformers/b70bcd80858f11fa2b3e6e1f870f687baed44b3e9aa0b36fb2eff4542d9296fd.7c48e42ad49cfc9c67c28b17e1215147ae0364ec72bae9f77c34e33b906f511d.lock


In [9]:
model_args

T5Args(adafactor_beta1=None, adafactor_clip_threshold=1.0, adafactor_decay_rate=-0.8, adafactor_eps=(1e-30, 0.001), adafactor_relative_step=False, adafactor_scale_parameter=False, adafactor_warmup_init=False, adam_epsilon=1e-08, best_model_dir='outputs/best_model', cache_dir='cache_dir/', config={}, cosine_schedule_num_cycles=0.5, custom_layer_parameters=[], custom_parameter_groups=[], dataloader_num_workers=0, do_lower_case=False, dynamic_quantize=False, early_stopping_consider_epochs=False, early_stopping_delta=0, early_stopping_metric='eval_loss', early_stopping_metric_minimize=True, early_stopping_patience=3, encoding=None, eval_batch_size=2, evaluate_during_training=True, evaluate_during_training_silent=True, evaluate_during_training_steps=2000, evaluate_during_training_verbose=True, evaluate_each_epoch=True, fp16=False, gradient_accumulation_steps=1, learning_rate=1.66e-05, local_rank=-1, logging_steps=50, manual_seed=13, max_grad_norm=1.0, max_seq_length=150, model_name='Davlan/

## Performing train-test split
We perform the train test split over here. The split used will be 99% of train data validated on the left over 1% of train data. <br> This was the maximum training we could provide to the model. We realised that more data the model trains on, better performance is attained out of it and hence we trained on majority of train data.

In [10]:
from sklearn.model_selection import train_test_split
id2use = test_df['ID']
train_df = train_df[['Yoruba','English']]
X = train_df['Yoruba']
y = train_df['English']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.01, random_state=13)

In [11]:
train = pd.DataFrame(X_train.values,columns=['input_text'])
train['prefix']  = ""
train['target_text'] = y_train.values

test  = pd.DataFrame(X_test.values,columns=['input_text'])
test['prefix']  = ""
test['target_text'] = y_test.values

train.shape,test.shape

((9953, 3), (101, 3))

In [12]:
for cols in train.columns:
  train[cols] = train[cols].astype(str)
  test[cols]  = test[cols].astype(str)

## Training the model
We train the model below and wait for the results. 

In [13]:
model.train_model(train, eval_data=test)

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/9953 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model: Training started


Epoch:   0%|          | 0/7 [00:00<?, ?it/s]

Running Epoch 0 of 7:   0%|          | 0/4977 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.871840338496601}
INFO:simpletransformers.t5.t5_model: No improvement in eval_loss
INFO:simpletransformers.t5.t5_model: Current step: 1
INFO:simpletransformers.t5.t5_model: Early stopping patience: 3
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.816376351258334}
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.796788718770532}


Running Epoch 1 of 7:   0%|          | 0/4977 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.7779998387776168}
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.7467299322871601}
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.7276609896444808}


Running Epoch 2 of 7:   0%|          | 0/4977 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.7288636422040415}
INFO:simpletransformers.t5.t5_model: No improvement in eval_loss
INFO:simpletransformers.t5.t5_model: Current step: 1
INFO:simpletransformers.t5.t5_model: Early stopping patience: 3
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.724806396692407}
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.7065041851179272}
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.701942775179358}


Running Epoch 3 of 7:   0%|          | 0/4977 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.704771741640334}
INFO:simpletransformers.t5.t5_model: No improvement in eval_loss
INFO:simpletransformers.t5.t5_model: Current step: 1
INFO:simpletransformers.t5.t5_model: Early stopping patience: 3
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.6944094718086953}
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.6835146373977847}


Running Epoch 4 of 7:   0%|          | 0/4977 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.6885259671538484}
INFO:simpletransformers.t5.t5_model: No improvement in eval_loss
INFO:simpletransformers.t5.t5_model: Current step: 1
INFO:simpletransformers.t5.t5_model: Early stopping patience: 3
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.6845120504206301}
INFO:simpletransformers.t5.t5_model: No improvement in eval_loss
INFO:simpletransformers.t5.t5_model: Current step: 2
INFO:simpletransformers.t5.t5_model: Early stopping patience: 3
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.6831506107952081}
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.6814905396279167}


Running Epoch 5 of 7:   0%|          | 0/4977 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.6816962977834777}
INFO:simpletransformers.t5.t5_model: No improvement in eval_loss
INFO:simpletransformers.t5.t5_model: Current step: 1
INFO:simpletransformers.t5.t5_model: Early stopping patience: 3
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.6801879625110065}
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.6770829783350814}


Running Epoch 6 of 7:   0%|          | 0/4977 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.6780137782003366}
INFO:simpletransformers.t5.t5_model: No improvement in eval_loss
INFO:simpletransformers.t5.t5_model: Current step: 1
INFO:simpletransformers.t5.t5_model: Early stopping patience: 3
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.682381566248688}
INFO:simpletransformers.t5.t5_model: No improvement in eval_loss
INFO:simpletransformers.t5.t5_model: Current step: 2
INFO:simpletransformers.t5.t5_model: Early stopping patience: 3
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.676317053682664}
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/101 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/51 [00:00<?, ?it/s]

Decoding outputs:   0%|          | 0/101 [00:00<?, ?it/s]

INFO:simpletransformers.t5.t5_model:{'eval_loss': 1.6717734645102538}
INFO:simpletransformers.t5.t5_model: Training of Davlan/mt5_base_yor_eng_mt model complete. Saved to outputs/.


(34839,
 {'eval_loss': [1.871840338496601,
   1.816376351258334,
   1.796788718770532,
   1.7779998387776168,
   1.7467299322871601,
   1.7276609896444808,
   1.7288636422040415,
   1.724806396692407,
   1.7065041851179272,
   1.701942775179358,
   1.704771741640334,
   1.6944094718086953,
   1.6835146373977847,
   1.6885259671538484,
   1.6845120504206301,
   1.6831506107952081,
   1.6814905396279167,
   1.6816962977834777,
   1.6801879625110065,
   1.6770829783350814,
   1.6780137782003366,
   1.682381566248688,
   1.676317053682664,
   1.6717734645102538],
  'global_step': [2000,
   4000,
   4977,
   6000,
   8000,
   9954,
   10000,
   12000,
   14000,
   14931,
   16000,
   18000,
   19908,
   20000,
   22000,
   24000,
   24885,
   26000,
   28000,
   29862,
   30000,
   32000,
   34000,
   34839],
  'train_loss': [1.9625639915466309,
   1.9588227272033691,
   1.254836916923523,
   1.0439339876174927,
   1.1379494667053223,
   1.4247726202011108,
   2.7862091064453125,
   2.93303

## Getting results on test dataset.
We prepare the test data to model ready test data and use our model to predict on this dataset to get the translations.
<br>
Note:- Model trained above is used for translations

In [14]:
test_df['input_text'] = test_df['Yoruba']
test_df['prefix']     = ""

In [15]:
test_to_predict = [
                      prefix + ": " + str(input_text)
                      for prefix, input_text in zip(test_df["prefix"].tolist(), test_df["input_text"].tolist())
                  ]

In [16]:
test_preds = model.predict(test_to_predict)

Generating outputs:   0%|          | 0/3408 [00:00<?, ?it/s]



Decoding outputs:   0%|          | 0/6816 [00:00<?, ?it/s]

## Creation of submission file and submitting 

In [17]:
test_df['Label'] = test_preds
test_df['Label'] = test_df['Label'].apply(lambda z: z.replace(":",""))

In [18]:
output_df = test_df[['ID','Label']]

In [19]:
output_df.to_csv('final_submission.csv',index=False)

### The output file attained here will be submitted to get the results