### Imports, preparation

(Used the trick in https://github.com/googlecolab/colabtools/issues/253#issuecomment-648634717 to obtain more RAM in google colab)

In [81]:
!pip install -q torchtext==0.6.0

In [94]:
from google.colab import drive
drive.mount('/content/drive')

ROOT_PATH = "/content/drive/My Drive/cil"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [93]:
import torch
from torch.utils.data import DataLoader, Dataset, random_split
import torch.nn.functional as F
from torch.autograd import Variable
from torch import optim
from torch import nn
from sklearn import metrics

import pandas as pd
import numpy as np
import joblib

import pickle
import json
import csv

import sys
import os
from tqdm.notebook import tqdm
tqdm.pandas()
import argparse
import datetime
import errno

### Set experiment parameters

In [4]:
class Obj:
    pass
args = Obj()
args.checkpoint_save_to_dir = os.path.join(ROOT_PATH, "CIL-results", "my_checkpoints")
args.checkpoint_continue_from = None # os.path.join(ROOT_PATH, "CIL-results", "my_checkpoints", "...") --> just set this inline wherever most convenient
args.cuda = True
args.epochs = 10
args.max_norm = 1e3
args.val_interval = 5000 # evaluate on the validation set every val_interval batch
args.max_samples = None # TODO: set to None to use all samples
args.batch_size = 128
args.num_workers = 4
args.val_frac = 0.01 # 0.1 # reserve 0.1 = 10% of the training samples for validation.
args.val_frac = None # Set to None to train on full training set, without validation.

### Load datasets

In [97]:
from torchtext.vocab import GloVe

class TweetsAsWordVectorsDataset(Dataset):
    def __init__(self, data_path, is_labeled=True, l1=131, max_samples=None, 
                 word_emb_name="twitter.27B", word_emb_dim=200, vector_cache_path=os.path.join(ROOT_PATH, "CIL-aux-data")):
        """A dataset object whose samples consist of: the (padded) concatenation of the word vectors of tweets.

        Arguments:
            data_path: path of (label and) data file in csv.
            is_labeled: whether the data_path file contains labels, or only the tweets.
            l1: max length of a sample, in nb of words.
            max_samples: (for dev,) only keep the max_samples first samples of the data.
            
            word_emb_name: name of the word embedding to use, used by torchtext.GloVe.
            word_emb_dim: dimension of the word embedding to use, used by torchtext.GloVe.
            vector_cache_path: path to cache directory, used by torchtext.GloVe.
        """
        self.glove = GloVe(name=word_emb_name, dim=word_emb_dim, cache=vector_cache_path)
        print("loaded pretrained GloVe word-embeddings.")
        self.data_path = data_path
        self.is_labeled = is_labeled
        self.l1 = l1
        # TODO: setting max_samples only makes sense if the csv itself was shuffled
        # X_txt = pd.read_csv(data_path, nrows=max_samples) # only keep max_samples first samples, or keep all if None
        X_txt = pd.read_csv(data_path)
        if max_samples:
            assert is_labeled, "must not use `max_samples` for unlabeled (assumed test-) data, as shuffling would modify the samples' ordering"
            X_txt = X_txt.sample(frac=1).reset_index(drop=True).iloc[:max_samples] # shuffle then select max_samples first
        self.y = X_txt['label'].to_numpy().astype(np.integer, copy=False) if is_labeled else None
        self.X_txt = X_txt['preprocessed_segmented_tweet'].to_numpy()
            
    def __len__(self):
        return self.X_txt.shape[0]

    def __getitem__(self, idx):
        words = self.X_txt[idx].lower().split()
        words += [""]*(self.l1 - len(words)) # pad with zeros until of correct size
        assert len(words) == self.l1
        X = self.glove.get_vecs_by_tokens(words, lower_case_backup=True)
        # for i in np.where(~X.bool().all(axis=1))[0]: # print OOV words
        #     if words[i] != "":
        #         print("out-of-vocabulary:", i, words[i])
        assert X.shape == (self.  , self.glove.dim)
        if self.is_labeled:
            return X, self.y[idx]
        else:
            return X

In [None]:
PREPROCESSED_TWITTER_DATASETS_DIR = os.path.join(ROOT_PATH, "stanford_glove_preprocessed")
TWEETS_TRAIN_FILENAME = os.path.join(PREPROCESSED_TWITTER_DATASETS_DIR, "dataset_stanfordglove_segmented_full.csv")

# train_dataset = TweetsAsWordVectorsDataset(TWEETS_TRAIN_FILENAME, is_labeled=True)
train_dataset = TweetsAsWordVectorsDataset(TWEETS_TRAIN_FILENAME, is_labeled=True, max_samples=args.max_samples)

if args.val_frac:
    val_size = int(args.val_frac * len(train_dataset))
    train_size = len(train_dataset) - val_size

    torch.manual_seed(0) # need random_split to be deterministic if we want to avoid information leak when we reload notebook in-between training epochs
    train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])
    torch.manual_seed(torch.initial_seed())

    val_dataloader = DataLoader(val_dataset, 
                                batch_size=args.batch_size,
                                shuffle=True, 
                                num_workers=args.num_workers)
    
train_dataloader = DataLoader(train_dataset, 
                              batch_size=args.batch_size,
                              shuffle=True, 
                              num_workers=args.num_workers)

len(train_dataloader), len(val_dataloader) if args.val_frac else None # number of batches. multiply by args.batch_size to get (approximate) number of samples.

In [99]:
TWEETS_TEST_FILENAME = os.path.join(PREPROCESSED_TWITTER_DATASETS_DIR, "test_stanfordglove_segmented.csv")

test_dataset = TweetsAsWordVectorsDataset(TWEETS_TEST_FILENAME, is_labeled=False)
test_dataloader = DataLoader(test_dataset, 
                             batch_size=args.batch_size,
                             shuffle=False, # need to keep the ordering of the tweets
                             num_workers=0)
len(test_dataloader)

loaded pretrained GloVe word-embeddings.


79

In [101]:
# for i_batch, data in enumerate(train_dataloader):
#     X, y = data
#     print(i_batch, X.shape, y)
#     if i_batch == 2:
#         break
# for i_batch, data in enumerate(val_dataloader):
#     X, y = data
#     print(i_batch, X.shape, y)
#     if i_batch == 2:
#         break

### Auxiliary functions

In [80]:
def save_checkpoint(model, optimizer, checkpoint, filename):
    """
    Args:
        optimizer: can be set to None, then no optimizer will be saved
        checkpoint is a dict that can be prepopulated (e.g with keys 'epoch' and 'validation_accuracy')
    """
    # https://pytorch.org/tutorials/beginner/saving_loading_models.html#saving-loading-model-across-devices
    if isinstance(model, torch.nn.DataParallel):
        assert not isinstance(model.module, torch.nn.DataParallel) # check we didn't wrap multiple times by mistake...
        checkpoint['model_state_dict'] = model.module.state_dict()
    else:
        checkpoint['model_state_dict'] = model.state_dict()
    if optimizer is not None:
        checkpoint['optimizer_state_dict'] = optimizer.state_dict()
    torch.save(checkpoint,filename)

def load_checkpoint(model, optimizer, filename):
    """
    Args:
        optimizer: can be set to None, then the optimizer state will be ignored (if there is one stored in the checkpoint)
            MUST be set to None if no optimizer state is stored in the checkpoint (so as to minimize risks of confusion)
    """
    # https://pytorch.org/tutorials/beginner/saving_loading_models.html#saving-loading-model-across-devices
    assert os.path.isfile(filename), f"no checkpoint found at {filename} (no such file)"
    # try to make it so that cpu->cpu, gpu->gpu, cpu->gpu, gpu->cpu all work (not 100% sure but I think this should do it)
    if args.cuda:
        device = torch.device("cuda")
        checkpoint = torch.load(filename, map_location=device)
    else:
        device = torch.device("cpu")
        checkpoint = torch.load(filename, map_location=device)
    # checkpoint = torch.load(filename) # or just don't worry abt it and pray that it works
    
    if isinstance(model, torch.nn.DataParallel):
        assert not isinstance(model.module, torch.nn.DataParallel) # check we didn't wrap multiple times by mistake...
        model.module.load_state_dict(checkpoint['model_state_dict'])
    else:
        model.load_state_dict(checkpoint['model_state_dict'])
    if args.cuda:
        model = model.cuda() # possibly always a noop but just in case

    loaded_optimizer = False
    if 'optimizer_state_dict' in checkpoint.keys():
        if optimizer is not None:
            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            loaded_optimizer = True
    else:
        assert optimizer is None, "Argument `optimizer` MUST be set to None if no optimizer state is stored in the checkpoint"

    if loaded_optimizer:
        print("successfully loaded model and optimizer states from checkpoint (in place)")
    else:
        print("successfully loaded model state from checkpoint (in place). (Did NOT load optimizer state.)")
    
    if 'epoch' in checkpoint:
        print(f"the model was trained for {checkpoint['epoch']} epochs")
    if 'validation_accuracy' in checkpoint:
        print(f"the model had achieved validation accuracy {checkpoint['validation_accuracy']}")
    return checkpoint

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [13]:
def eval(val_dataloader, model):
    criterion_reduc_sum = nn.BCEWithLogitsLoss(reduction='sum') # sum of losses (instead of mean) over the batch
    if args.cuda:
        criterion_reduc_sum = criterion_reduc_sum.cuda()
    was_training = model.training # don't forget to put it back in training mode at the end!
    model.eval()
    with torch.no_grad():
        predicates_all = []
        target_all = []
        accumulated_loss = 0
        tot_samples = 0
        for i_batch, data in enumerate(tqdm(val_dataloader)):
            inputs, target = data
            target = target.float() # for some reason BCEWithLogitsLoss requires target to be float
            if args.cuda:
                inputs, target = inputs.cuda(), target.cuda()
            tot_samples += len(target)
            logit = model(inputs)
            logit = logit.squeeze(1)
            assert logit.shape == (target.shape[0],) # val_dataloader.batch_size, except for the last batch

            accumulated_loss += criterion_reduc_sum(logit, target).item() # sum of losses (instead of mean) over the batch
            predicates = torch.round(torch.sigmoid(logit))

            predicates_all.append(predicates)
            target_all.append(target)

            if args.cuda:
                torch.cuda.synchronize()
    if was_training:
        model.train()

    avg_loss = accumulated_loss / tot_samples
    predicates_all = torch.cat(predicates_all).cpu()
    target_all = torch.cat(target_all).cpu()
    accuracy = metrics.accuracy_score(target_all, predicates_all)
    f1_score = metrics.f1_score(target_all, predicates_all)
    print(f"Validation - \
        \n\t loss: {accumulated_loss / tot_samples}  \
        \n\t acc: {accuracy} \
        \n\t f1-score: {f1_score} \
    ")
    # if args.log_result:
    #     with open(os.path.join(path, args.save_folder,'result_res.csv'), 'a') as r:
    #         r.write('\n{:d},{:d},{:.5f},{:.2f},{:f}'.format(epoch_train, 
    #                                                         batch_train, 
    #                                                         avg_loss, 
    #                                                         accuracy, 
    #                                                         optimizer.state_dict()['param_groups'][0]['lr']))
    return avg_loss, accuracy

def predict(test_dataloader, model):
    assert not test_dataloader.dataset.is_labeled # the samples we get from test_dataloader are inputs only, no labels!
    was_training = model.training # don't forget to put it back in training mode at the end!
    model.eval()
    with torch.no_grad():
        y_pred = []
        for i_batch, data in enumerate(tqdm(test_dataloader)):
            inputs = data
            # inputs = inputs[::-1] # TODO: check that it's in the right order
            if args.cuda:
                inputs = inputs.cuda()
            logit = model(inputs)
            logit = logit.squeeze(1)
            assert logit.shape == (inputs.shape[0],) # test_dataloader.batch_size, except for the last batch

            predicates = torch.round(torch.sigmoid(logit))
            y_pred.append(predicates)

            if args.cuda:
                torch.cuda.synchronize()
    if was_training:
        model.train()

    y_pred = torch.cat(y_pred)
    return y_pred

### Define (and instantiate) the model

In [14]:
"""input size (N x C x L), for example, the initial text is (batch_size, 200, 131)"""

class  WordCNN(nn.Module):
    def __init__(self, input_feats=200, input_len=131):
        """
        Args:
            input_feats (int): the expected number of features of the input sequences
            input_len (int): the expected length of the input sequences (already padded)
        """
        super().__init__()
        self.rnn = nn.GRU(input_size=input_feats,
						  hidden_size=25,
						  num_layers=2,
						  bidirectional=True,
						  batch_first=True,
						  dropout=0.5)
        self.conv1 = nn.Sequential(
            nn.Conv1d(50, 50, 3), # nb of input channels is 2*hidden_size of rnn (2* because bidirectional)
            nn.BatchNorm1d(50),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=3, stride=3)
        )
        self.downsample = nn.Sequential(
            nn.Conv1d(50, 50, 4, stride=4),
            nn.BatchNorm1d(50)
        )
        self.fc_final = nn.Sequential(
            nn.Flatten(),
            nn.Linear(500, 50),
            nn.ReLU(),
            nn.Linear(50, 1)
        )

    def forward(self, x):
        # x: (batch, input_len, input_feats) = (batch, 131, 200)
        out, _ = self.rnn(x) # don't need the final hidden state
        # out: (batch, seq_len, num_directions * hidden_size)
        out = out.permute(0, 2, 1)
        # out: (batch, num_directions * hidden_size, seq_len) = (batch, 60, 131)
        out = self.conv1(out)
        # out: (batch, out_channels, out_len) = (batch, 70, 43)
        out = self.downsample(out)
        # out: (batch, out_channels, out_len) = (batch, 70, 10)
        out = self.fc_final(out)
        # out: (batch, 1)
        return out

In [90]:
model = WordCNN()
model_nickname = "WordCNN"
print(f"{count_parameters(model)} parameters")

criterion = nn.BCEWithLogitsLoss()
if args.cuda:
    model = torch.nn.DataParallel(model).cuda()
    # model = model.cuda()
    criterion = criterion.cuda()

optimizer = optim.Adam(model.parameters()) # TODO: try tweaking parameters (e.g learning rate)

88501 parameters


### Training

In [85]:
args.checkpoint_continue_from = os.path.join(ROOT_PATH, "CIL-results", "my_checkpoints", "WordCNN_epoch_8_2020-07-27T00:45:15.601964.pth.tar")

args.checkpoint_continue_from = os.path.join(ROOT_PATH, "CIL-results", "my_checkpoints", "WordCNN_epoch_SPECIAL_2020-07-27T14:54:02.739827.pth.tar")

In [91]:
if args.checkpoint_continue_from:
    print(f"=> loading checkpoint from {args.checkpoint_continue_from}")
    checkpoint = load_checkpoint(model, optimizer, args.checkpoint_continue_from) # load the state to `model` and `optimizer` and fetch the remaining info into `checkpoint`
    
    # always assume that we saved a model after an epoch finished, so start at the next epoch.
    start_epoch = checkpoint['epoch'] + 1
    # load optimizer, default all parameters are in cpu     --> pretty sure it's always a noop, but just in case
    if args.cuda:
        for state in optimizer.state.values():
            for k, v in state.items():
                if torch.is_tensor(v):
                    state[k] = v.cuda()
else:
    start_epoch = 1

start_epoch

=> loading checkpoint from /content/drive/My Drive/cil/CIL-results/my_checkpoints/WordCNN_epoch_SPECIAL_2020-07-27T14:54:02.739827.pth.tar
successfully loaded model and optimizer states from checkpoint (in place)
the model was trained for SPECIAL epochs
the model had achieved validation accuracy 0.876456


0

In [84]:
# ## for a manual save
# epoch="SPECIAL"

# if args.checkpoint_save_to_dir:
#     ts = datetime.datetime.now().isoformat()
#     file_path = os.path.join(args.checkpoint_save_to_dir, f"{model_nickname}_epoch_{epoch}_{ts}.pth.tar")
#     print(f"=> saving checkpoint model to {file_path}")
#     save_checkpoint(model, 
#                     optimizer,
#                     {'epoch': epoch,
#                       'validation_accuracy': val_acc},
#                     file_path)

=> saving checkpoint model to /content/drive/My Drive/cil/CIL-results/my_checkpoints/WordCNN_epoch_SPECIAL_2020-07-27T14:54:02.739827.pth.tar


In [23]:
# ## for a manual validation evaluation
# val_loss, val_acc = eval(val_dataloader, model)

HBox(children=(FloatProgress(value=0.0, max=1954.0), HTML(value='')))


Validation -         
	 loss: 0.28431587031173705          
	 acc: 0.876456         
	 f1-score: 0.8796833731973542     


In [None]:
model.train()

for epoch in range(start_epoch, args.epochs+1):
    print(f"\n\n===== Starting epoch #{epoch} =====")
    accumulated_train_loss = 0
    for i_batch, data in enumerate(tqdm(train_dataloader)):
        inputs, target = data
        target = target.float() # for some reason BCEWithLogitsLoss requires target to be float
        if args.cuda:
            inputs, target = inputs.cuda(), target.cuda()

        optimizer.zero_grad()
        logit = model(inputs)
        logit = logit.squeeze(1) # (n, 1) -> (n,)
        assert logit.shape == (target.shape[0],) # train_dataloader.batch_size, except for the last batch
        loss = criterion(logit, target)
        accumulated_train_loss += criterion(logit, target).item()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_norm)
        optimizer.step()
        if args.cuda:
            torch.cuda.synchronize()

        # if args.verbose:
        #     print('\nTargets, Predicates')
        #     print(torch.cat((target.unsqueeze(1), torch.unsqueeze(torch.max(logit, 1)[1].view(target.size()).data, 1)), 1))
        #     print('\nLogit')
        #     print(logit)
        # if i_batch % args.log_interval == 0:
        #     corrects = (torch.round(torch.sigmoid(logit)) == target.data).float().sum()  # convert into float for division
        #     accuracy = 100.0 * corrects/args.batch_size
        #     print('Epoch[{}] Batch[{}] - loss: {:.6f}  lr: {:.5f}  acc: {:.3f}% ({}/{})'.format(epoch,
        #                                                                   i_batch,
        #                                                                   loss.data,
        #                                                                   optimizer.state_dict()['param_groups'][0]['lr'],
        #                                                                   accuracy,
        #                                                                   corrects,
        #                                                                   args.batch_size))
        if (i_batch+1) % args.val_interval == 0:
            print(f"Training - loss: {accumulated_train_loss / (i_batch+1)}")
            val_loss, val_acc = eval(val_dataloader, model)

    print(f"----- Finished epoch #{epoch} -----")
    # validation
    print('\nTraining - loss: {:.6f}'.format(accumulated_train_loss/i_batch))
    val_loss, val_acc = eval(val_dataloader, model)

    # save the model as this epoch
    if args.checkpoint_save_to_dir:
        ts = datetime.datetime.now().isoformat()
        file_path = os.path.join(args.checkpoint_save_to_dir, f"{model_nickname}_epoch_{epoch}_{ts}.pth.tar")
        print(f"=> saving checkpoint model to {file_path}")
        save_checkpoint(model, 
                        optimizer,
                        {'epoch': epoch,
                         'validation_accuracy': val_acc},
                        file_path)

    start_epoch = epoch+1

print(f"finished the required number of epochs args.epoch={args.epoch}")

HBox(children=(FloatProgress(value=0.0, max=17579.0), HTML(value='')))

Training - loss: 0.3327306329786778


HBox(children=(FloatProgress(value=0.0, max=1954.0), HTML(value='')))


Validation -         
	 loss: 0.3229596925544739          
	 acc: 0.854488         
	 f1-score: 0.8560894367478697     
Training - loss: 0.32645890670418737


HBox(children=(FloatProgress(value=0.0, max=1954.0), HTML(value='')))


Validation -         
	 loss: 0.3160117226867676          
	 acc: 0.857316         
	 f1-score: 0.8627019287394104     
Training - loss: 0.32291412433187167


HBox(children=(FloatProgress(value=0.0, max=1954.0), HTML(value='')))


Validation -         
	 loss: 0.30975722340393064          
	 acc: 0.861592         
	 f1-score: 0.8656660791514935     



----- Finished epoch #4 -----

Training - loss: 0.321462


HBox(children=(FloatProgress(value=0.0, max=1954.0), HTML(value='')))


Validation -         
	 loss: 0.30743769065856935          
	 acc: 0.862264         
	 f1-score: 0.8628061899373676     
=> saving checkpoint model to /content/drive/My Drive/cil/CIL-results/my_checkpoints/WordCNN_epoch_4_2020-07-26T23:18:21.536587.pth.tar


HBox(children=(FloatProgress(value=0.0, max=17579.0), HTML(value='')))

Training - loss: 0.30709779320657254


HBox(children=(FloatProgress(value=0.0, max=1954.0), HTML(value='')))


Validation -         
	 loss: 0.3061881157131195          
	 acc: 0.862784         
	 f1-score: 0.8615277800203446     
Training - loss: 0.3057945430353284


HBox(children=(FloatProgress(value=0.0, max=1954.0), HTML(value='')))


Validation -         
	 loss: 0.3054709049987793          
	 acc: 0.864484         
	 f1-score: 0.8690691967304981     
Training - loss: 0.3047079294959704


HBox(children=(FloatProgress(value=0.0, max=1954.0), HTML(value='')))


Validation -         
	 loss: 0.3016506938285828          
	 acc: 0.866052         
	 f1-score: 0.8671103333055544     



----- Finished epoch #5 -----

Training - loss: 0.304560


HBox(children=(FloatProgress(value=0.0, max=1954.0), HTML(value='')))


Validation -         
	 loss: 0.3031929013118744          
	 acc: 0.865744         
	 f1-score: 0.869928151231195     
=> saving checkpoint model to /content/drive/My Drive/cil/CIL-results/my_checkpoints/WordCNN_epoch_5_2020-07-26T23:39:55.717249.pth.tar


HBox(children=(FloatProgress(value=0.0, max=17579.0), HTML(value='')))

Training - loss: 0.2979342018902302


HBox(children=(FloatProgress(value=0.0, max=1954.0), HTML(value='')))


Validation -         
	 loss: 0.29988063784027097          
	 acc: 0.867148         
	 f1-score: 0.8695979929092216     
Training - loss: 0.2983758534103632


HBox(children=(FloatProgress(value=0.0, max=1954.0), HTML(value='')))


Validation -         
	 loss: 0.29985317774391174          
	 acc: 0.8667         
	 f1-score: 0.86716598173607     
Training - loss: 0.29796034591098625


HBox(children=(FloatProgress(value=0.0, max=1954.0), HTML(value='')))


Validation -         
	 loss: 0.30073658047056195          
	 acc: 0.867836         
	 f1-score: 0.8703862795633123     



----- Finished epoch #6 -----

Training - loss: 0.297870


HBox(children=(FloatProgress(value=0.0, max=1954.0), HTML(value='')))


Validation -         
	 loss: 0.29783907020378114          
	 acc: 0.8682         
	 f1-score: 0.8693010083060301     
=> saving checkpoint model to /content/drive/My Drive/cil/CIL-results/my_checkpoints/WordCNN_epoch_6_2020-07-27T00:01:38.668085.pth.tar


HBox(children=(FloatProgress(value=0.0, max=17579.0), HTML(value='')))

Training - loss: 0.2929603427171707


HBox(children=(FloatProgress(value=0.0, max=1954.0), HTML(value='')))


Validation -         
	 loss: 0.3022654273080826          
	 acc: 0.868012         
	 f1-score: 0.8714093645875957     
Training - loss: 0.2930291951611638


HBox(children=(FloatProgress(value=0.0, max=1954.0), HTML(value='')))


Validation -         
	 loss: 0.2980802205848694          
	 acc: 0.86852         
	 f1-score: 0.8702114049704254     


### Prediction

In [24]:
y_pred = predict(test_dataloader, model)

HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))




In [25]:
y_pred = y_pred.cpu().numpy()
y_pred = y_pred.astype(np.integer, copy=False)

# y_pred[y_pred==1] = -1
# y_pred[y_pred==0] = 1
y_pred[y_pred==0] = -1
print(f"predict {np.count_nonzero(y_pred==-1)} positive, {np.count_nonzero(y_pred==1)} negative")

predict 4808 positive, 5192 negative


In [26]:
ts = datetime.datetime.now().isoformat()
SUBMISSION_FILENAME = os.path.join(ROOT_PATH, f"{model_nickname}_submission_{ts}.csv")

with open(SUBMISSION_FILENAME, "w") as f:
    f.write("Id,Prediction\n")
    for i, label in enumerate(y_pred, start=1):  
        f.write(f"{i},{label}\n")

print(f"wrote to {SUBMISSION_FILENAME}")

wrote to /content/drive/My Drive/cil/WordCNN_submission_2020-07-27T13:45:33.598866.csv
