In [1]:
import numpy as np
import pandas as pd
import os
import time
import gc
import random
from tqdm._tqdm_notebook import tqdm_notebook as tqdm
from nltk.stem import PorterStemmer
ps = PorterStemmer()
from nltk.stem.lancaster import LancasterStemmer
lc = LancasterStemmer()
from nltk.stem import SnowballStemmer
sb = SnowballStemmer("english")
from keras.preprocessing import text, sequence
import torch
from torch import nn
from torch.utils import data
from torch.nn import functional as F
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import roc_auc_score
import spacy
import gensim

Using TensorFlow backend.


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
# disable progress bars when submitting
def is_interactive():
   return 'SHLVL' not in os.environ

if not is_interactive():
    def nop(it, *a, **k):
        return it

    tqdm = nop

In [4]:
def seed_everything(seed=1234):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything()

In [5]:
CRAWL_EMBEDDING_PATH = '../input/crawl-300d-2M.vec'
GLOVE_EMBEDDING_PATH = '../input/glove.840B.300d.txt'
NUM_MODELS = 2
LSTM_UNITS = 128
DENSE_HIDDEN_UNITS = 4 * LSTM_UNITS
MAX_LEN = 220

In [6]:
start_time = time.time()
spell_model = gensim.models.KeyedVectors.load_word2vec_format('../input/wiki-news-300d-1M.vec')
words = spell_model.index2word
w_rank = {}
for i,word in enumerate(words):
    w_rank[word] = i
WORDS = w_rank
del words
del w_rank
del spell_model
gc.collect()
print("--- %s seconds ---" % (time.time() - start_time))

# Use fast text as vocabulary
def words(text): return re.findall(r'\w+', text.lower())
def P(word): 
    "Probability of `word`."
    # use inverse of rank as proxy
    # returns 0 if the word isn't in the dictionary
    return - WORDS.get(word, 0)
def correction(word): 
    "Most probable spelling correction for word."
    return max(candidates(word), key=P)
def candidates(word): 
    "Generate possible spelling corrections for word."
    return (known([word]) or known(edits1(word)) or [word])
def known(words): 
    "The subset of `words` that appear in the dictionary of WORDS."
    return set(w for w in words if w in WORDS)
def edits1(word):
    "All edits that are one edit away from `word`."
    letters    = 'abcdefghijklmnopqrstuvwxyz'
    splits     = [(word[:i], word[i:])    for i in range(len(word) + 1)]
    deletes    = [L + R[1:]               for L, R in splits if R]
    transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R)>1]
    replaces   = [L + c + R[1:]           for L, R in splits if R for c in letters]
    inserts    = [L + c + R               for L, R in splits for c in letters]
    return set(deletes + transposes + replaces + inserts)
def edits2(word): 
    "All edits that are two edits away from `word`."
    return (e2 for e1 in edits1(word) for e2 in edits1(e1))
def singlify(word):
    return "".join([letter for i,letter in enumerate(word) if i == 0 or letter != word[i-1]])

--- 117.69470286369324 seconds ---


In [7]:

def get_coefs(word, *arr):
    return word, np.asarray(arr, dtype='float32')

def load_embeddings(path):
    with open(path) as f:
        return dict(get_coefs(*line.strip().split(' ')) for line in tqdm(f))

def build_matrix(word_dict, lemma_dict, path):
    embed_size = 300
    embeddings_index = load_embeddings(path)
    embedding_matrix = np.zeros((len(word_dict) + 1, embed_size), dtype=np.float32)
    unknown_words = []
    unknown_vector = np.zeros((embed_size,), dtype=np.float32) - 1
    
    for key in tqdm(word_dict):
        word = key
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[word_dict[key]] = embedding_vector
            continue
        word = key.lower()
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[word_dict[key]] = embedding_vector
            continue
        word = key.upper()
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[word_dict[key]] = embedding_vector
            continue
        word = key.capitalize()
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[word_dict[key]] = embedding_vector
            continue
        word = ps.stem(key)
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[word_dict[key]] = embedding_vector
            continue
        word = lc.stem(key)
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[word_dict[key]] = embedding_vector
            continue
        word = sb.stem(key)
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[word_dict[key]] = embedding_vector
            continue
        word = lemma_dict[key]
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[word_dict[key]] = embedding_vector
            continue
        if len(key) > 1:
            word = correction(key)
            embedding_vector = embeddings_index.get(word)
            if embedding_vector is not None:
                embedding_matrix[word_dict[key]] = embedding_vector
                continue
        
        #Unknown word, does not exist in dictionary
        embedding_matrix[word_dict[key]] = unknown_vector
        unknown_words.append(word)
    return embedding_matrix, unknown_words

In [8]:
class SequenceBucketCollator():
    def __init__(self, choose_length, sequence_index, length_index, label_index=None):
        self.choose_length = choose_length
        self.sequence_index = sequence_index
        self.length_index = length_index
        self.label_index = label_index
        
    def __call__(self, batch):
        batch = [torch.stack(x) for x in list(zip(*batch))]
        
        sequences = batch[self.sequence_index]
        lengths = batch[self.length_index]
        
        length = self.choose_length(lengths)
        mask = torch.arange(start=maxlen, end=0, step=-1) < length
        padded_sequences = sequences[:, mask]
        
        batch[self.sequence_index] = padded_sequences
        
        if self.label_index is not None:
            return [x for i, x in enumerate(batch) if i != self.label_index], batch[self.label_index]
    
        return batch

In [9]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def train_model(model, train, test, loss_fn, output_dim, lr=0.001,
                batch_size=512, n_epochs=4, n_epochs_embed=2,
                enable_checkpoint_ensemble=True):
    
    train_collator = SequenceBucketCollator(lambda lengths: lengths.max(), 
                                            sequence_index=0, 
                                            length_index=1, 
                                            label_index=2)
    
    param_lrs = [{'params': param, 'lr': lr} for param in model.parameters()]
    optimizer = torch.optim.Adam(param_lrs, lr=lr)

    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda epoch: 0.6 ** epoch)
    
    train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True, collate_fn=train_collator)
    test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False, collate_fn=train_collator)
    all_test_preds = []
    checkpoint_weights = [2 ** epoch for epoch in range(n_epochs)]
    
    for epoch in range(n_epochs):
        start_time = time.time()
        
        scheduler.step()
        
        model.train() #set model to train mode
        avg_loss = 0.
        
        for data in tqdm(train_loader, disable=False):
            
            #training loop
            x_batch = data[:-1]
            y_batch = data[-1]

            y_pred = model(*x_batch)  #feed data into model          
            loss = loss_fn(y_pred, y_batch)
            
            #calculate error and adjust model params

            optimizer.zero_grad()
            loss.backward()

            optimizer.step()
            avg_loss += loss.item() / len(train_loader) #gets the loss per epoch
        
        
        model.eval() #set model to eval mode for test data
        test_preds = np.zeros((len(test), output_dim))
    
        for i, x_batch in enumerate(test_loader):
            y_pred = sigmoid(model(*x_batch).detach().cpu().numpy()) #feed data into model

            test_preds[i * batch_size:(i+1) * batch_size, :] = y_pred #get test predictions
        
        #test_preds has the predictions for the entire test set now
        all_test_preds.append(test_preds) #append predictions to the record of all past predictions
        elapsed_time = time.time() - start_time
        print('Epoch {}/{} \t loss={:.4f} \t time={:.2f}s'.format(
              epoch + 1, n_epochs, avg_loss, elapsed_time))
        
    #Make embeddings layer only layer unfreezed, train again (literally run through the n_epochs)
    #maybe define a n_epochs_embedding
    
    #parameters = model.parameters()
    #for param in parameters:
    #        param.requires_grad = False
    #parameters[0].requires_grad = True
    
    '''
    ct = 0
    for child in model.children():
        if ct == 0:
            for param in child.parameters():
                param.requires_grad = True
        else:
            for param in child.parameters():
                param.requires_grad = False
        ct += 1
    
    for epoch in range(n_epochs_embed):
        start_time = time.time()
        
        scheduler.step()
        
        model.train() #set model to train mode
        avg_loss = 0.
        
        for data in tqdm(train_loader, disable=False):
            
            #training loop
            x_batch = data[:-1]
            y_batch = data[-1]

            y_pred = model(*x_batch)  #feed data into model          
            loss = loss_fn(y_pred, y_batch)
            
            #calculate error and adjust model params

            optimizer.zero_grad()
            loss.backward()

            optimizer.step()
            avg_loss += loss.item() / len(train_loader) #gets the loss per epoch
        
            
        model.eval() #set model to eval mode for test data
        test_preds = np.zeros((len(test), output_dim))
    
        for i, x_batch in enumerate(test_loader):
            y_pred = sigmoid(model(*x_batch).detach().cpu().numpy()) #feed data into model

            test_preds[i * batch_size:(i+1) * batch_size, :] = y_pred #get test predictions
        
        #test_preds has the predictions for the entire test set now
        #all_test_preds.append(test_preds) #append predictions to the record of all past predictions
        elapsed_time = time.time() - start_time
        print('[EMBEDDING TRAINING] Epoch {}/{} \t loss={:.4f} \t time={:.2f}s '.format(
              epoch + 1, n_epochs_embed, avg_loss, elapsed_time))
    '''
    
    #PREDICTION CODE
    '''
    if enable_checkpoint_ensemble:
        #if our approach is an ensemble then we average it amongst all the historical predictions
        test_preds = np.average(all_test_preds, weights=checkpoint_weights, axis=0)    
    else:
        #if our approach is not an ensemble then we just take the last set of predictions
        test_preds = all_test_preds[-1]
        
    return test_preds
    '''
    #return trained model
    return model

def predict(model, test, output_dim, batch_size=512):
    
    test_collator = SequenceBucketCollator(lambda lenghts: lenghts.max(), sequence_index=0, length_index=1)
    test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False, collate_fn=test_collator)
    
    model.eval() #set model to eval mode for test data
    test_preds = np.zeros((len(test), output_dim))
    
    for i, x_batch in enumerate(test_loader):
        y_pred = sigmoid(model(*x_batch).detach().cpu().numpy()) #feed data into model
        test_preds[i * batch_size:(i+1) * batch_size, :] = y_pred #get test predictions
        
    return test_preds

In [10]:
class SpatialDropout(nn.Dropout2d):
    def forward(self, x):
        x = x.unsqueeze(2)    # (N, T, 1, K)
        x = x.permute(0, 3, 2, 1)  # (N, K, 1, T)
        #call the forward method in Dropout2d (super function specifies the subclass and instance)
        x = super(SpatialDropout, self).forward(x)  # (N, K, 1, T), some features are masked
        x = x.permute(0, 3, 2, 1)  # (N, T, 1, K)
        x = x.squeeze(2)  # (N, T, K)
        return x
    
class NeuralNet(nn.Module):
    def __init__(self, embedding_matrix, num_aux_targets):
        #call the init mthod in Module (super function specifies the subclass and instance)
        super(NeuralNet, self).__init__() 
        embed_size = embedding_matrix.shape[1]
        
        self.embedding = nn.Embedding(max_features, embed_size)
        self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float))
        self.embedding.weight.requires_grad = False
        self.embedding_dropout = SpatialDropout(0.3)
        
        self.lstm1 = nn.LSTM(embed_size, LSTM_UNITS, bidirectional=True, batch_first=True)
        self.lstm2 = nn.LSTM(LSTM_UNITS * 2, LSTM_UNITS, bidirectional=True, batch_first=True)
    
        self.linear1 = nn.Linear(DENSE_HIDDEN_UNITS, DENSE_HIDDEN_UNITS)
        self.linear2 = nn.Linear(DENSE_HIDDEN_UNITS, DENSE_HIDDEN_UNITS)
        
        self.linear_out = nn.Linear(DENSE_HIDDEN_UNITS, 1)
        self.linear_aux_out = nn.Linear(DENSE_HIDDEN_UNITS, num_aux_targets)
        
    def forward(self, x, lengths=None):
        h_embedding = self.embedding(x)
        h_embedding = self.embedding_dropout(h_embedding)
        
        #first variable h_(lstm #) holds the output, _ is the (hidden state, cell state)
        h_lstm1, _ = self.lstm1(h_embedding) 
        h_lstm2, _ = self.lstm2(h_lstm1)
        
        # global average pooling
        avg_pool = torch.mean(h_lstm2, 1) #get the mean value of the first dimension in h_lstm2
        # global max pooling
        max_pool, _ = torch.max(h_lstm2, 1) #get the max value of the first dimension in h_lstm2
        
        h_conc = torch.cat((max_pool, avg_pool), 1)
        h_conc_linear1  = F.relu(self.linear1(h_conc))
        h_conc_linear2  = F.relu(self.linear2(h_conc))
        
        hidden = h_conc + h_conc_linear1 + h_conc_linear2
        
        result = self.linear_out(hidden)
        aux_result = self.linear_aux_out(hidden)
        out = torch.cat([result, aux_result], 1)
        
        return out

In [11]:
def preprocess(data):
    '''
    Credit goes to https://www.kaggle.com/gpreda/jigsaw-fast-compact-solution
    '''
    punct = "/-'?!.,#$%\'()*+-/:;<=>@[\\]^_`{|}~`" + '""“”’' + '∞θ÷α•à−β∅³π‘₹´°£€\×™√²—–&'
    def clean_special_chars(text, punct):
        for p in punct:
            text = text.replace(p, ' ')
        return text

    data = data.astype(str).apply(lambda x: clean_special_chars(x, punct))
    return data

In [12]:
train = pd.read_csv('../input/train.csv')
test = pd.read_csv('../input/test.csv')
#pd.read_csv("P00000001-ALL.csv", nrows=20)
#train = pd.read_hdf('../input/train.h5')
#test = pd.read_hdf('../input/test.h5')

identity_columns = [
    'male', 'female', 'homosexual_gay_or_lesbian', 'christian', 'jewish',
    'muslim', 'black', 'white', 'psychiatric_or_mental_illness']

x_train = preprocess(train['comment_text'])
y_train = np.where(train['target'] >= 0.5, 1, 0)
num_train_data = y_train.shape[0]
y_train_identity = np.where(train[identity_columns] >= 0.5, 1, 0)
y_aux_train = train[['target', 'severe_toxicity', 'obscene', 'identity_attack', 'insult', 'threat']]
x_test = preprocess(test['comment_text'])
y_aux_train = y_aux_train.as_matrix()



In [13]:
max_features = None

#Create the dictionary of all words that exist in our data
nlp = spacy.load("en_core_web_lg", disable=['parser','ner','tagger'])
text_list = pd.concat([x_train, x_test])
nlp.vocab.add_flag(lambda s: s.lower() in spacy.lang.en.stop_words.STOP_WORDS, spacy.attrs.IS_STOP)
word_dict = {}
lemma_dict = {}
word_index = 1
docs = nlp.pipe(text_list, n_threads = 2)
word_sequences = []

In [14]:
#create dictionary of word mapping to integers as wel as lemma dictionary
#count = 1
start_time = time.time()
for doc in tqdm(docs): #one doc is one comment(row)
    #print(count)
    word_seq = []
    for token in doc:
        if (token.text not in word_dict) and (token.pos_ is not "PUNCT"):
            word_dict[token.text] = word_index
            word_index += 1
            lemma_dict[token.text] = token.lemma_
        if token.pos_ is not "PUNCT":
            word_seq.append(word_dict[token.text])
    word_sequences.append(word_seq)
    #count+= 1

print("--- %s seconds ---" % (time.time() - start_time))
del docs
del text_list
gc.collect()

--- 367.0854768753052 seconds ---


0

In [15]:
#x_train = tokenizer.texts_to_sequences(x_train)
#x_test = tokenizer.texts_to_sequences(x_test)
x_train = word_sequences[:num_train_data]
x_test = word_sequences[num_train_data:]
x_train = sequence.pad_sequences(x_train, maxlen=MAX_LEN)
x_test = sequence.pad_sequences(x_test, maxlen=MAX_LEN)
gc.collect()

0

In [16]:
max_features = max_features or len(word_dict) + 1
max_features #number of unique words there are in the dictionary

419165

In [17]:
start_time = time.time()
crawl_matrix, unknown_words_crawl = build_matrix(word_dict, lemma_dict, CRAWL_EMBEDDING_PATH)
print('n unknown words (crawl): ', len(unknown_words_crawl))
print("--- %s seconds ---" % (time.time() - start_time))
print('Size: ', crawl_matrix.shape)
del unknown_words_crawl
gc.collect()

n unknown words (crawl):  79463
--- 110.56159520149231 seconds ---
Size:  (419165, 300)


0

In [18]:
start_time = time.time()
glove_matrix, unknown_words_glove = build_matrix(word_dict, lemma_dict, GLOVE_EMBEDDING_PATH)
print('n unknown words (glove): ', len(unknown_words_glove))
print("--- %s seconds ---" % (time.time() - start_time))
print('Size: ', glove_matrix.shape)
del unknown_words_glove
gc.collect()

n unknown words (glove):  81058
--- 116.92512583732605 seconds ---
Size:  (419165, 300)


0

In [19]:
embedding_matrix = np.mean([crawl_matrix, glove_matrix], axis=0)

del crawl_matrix
del glove_matrix
del word_dict
del lemma_dict
del word_sequences
del WORDS
gc.collect()

0

In [20]:
class JigsawEvaluator:

    def __init__(self, y_true, y_identity, power=-5, overall_model_weight=0.25):
        self.y = y_true
        self.y_i = y_identity
        self.n_subgroups = self.y_i.shape[1]
        self.power = power
        self.overall_model_weight = overall_model_weight

    @staticmethod
    def _compute_auc(y_true, y_pred):
        #print("Here: ", y_true)
        #print(y_pred)
        try:
            return roc_auc_score(y_true, y_pred)
        except ValueError:
            return np.nan

    def _compute_subgroup_auc(self, i, y_pred):
        mask = self.y_i[:, i] == 1
        #print(self.y)
        return self._compute_auc(self.y[mask], y_pred[mask])

    def _compute_bpsn_auc(self, i, y_pred):
        mask = self.y_i[:, i] + self.y == 1
        return self._compute_auc(self.y[mask], y_pred[mask])

    def _compute_bnsp_auc(self, i, y_pred):
        mask = self.y_i[:, i] + self.y != 1
        return self._compute_auc(self.y[mask], y_pred[mask])

    def compute_bias_metrics_for_model(self, y_pred):
        #print(y_pred)
        records = np.zeros((3, self.n_subgroups))
        for i in range(self.n_subgroups):
            #print(y_pred)
            records[0, i] = self._compute_subgroup_auc(i, y_pred)
            records[1, i] = self._compute_bpsn_auc(i, y_pred)
            records[2, i] = self._compute_bnsp_auc(i, y_pred)
        return records

    def _calculate_overall_auc(self, y_pred):
        return roc_auc_score(self.y, y_pred)

    def _power_mean(self, array):
        total = sum(np.power(array, self.power))
        return np.power(total / len(array), 1 / self.power)

    def get_final_metric(self, y_pred):
        bias_metrics = self.compute_bias_metrics_for_model(y_pred)
        bias_score = np.average([
            self._power_mean(bias_metrics[0]),
            self._power_mean(bias_metrics[1]),
            self._power_mean(bias_metrics[2])
        ])
        overall_score = self.overall_model_weight * self._calculate_overall_auc(y_pred)
        bias_score = (1 - self.overall_model_weight) * bias_score
        return overall_score + bias_score

In [21]:
lengths = torch.from_numpy(np.array([len(x) for x in x_train]))
test_lengths = torch.from_numpy(np.array([len(x) for x in x_test]))
maxlen = lengths.max() 

#x_train_padded = torch.from_numpy(x_train)
#x_test_padded = torch.from_numpy(x_test)

In [22]:

all_val_preds = []
all_test_preds = []
num_splits = 5

#Add in K fold 
random_state = 2019

#K fold splits
splits = list(StratifiedKFold(n_splits=num_splits, shuffle=True, random_state=random_state).split(x_train,y_train))

#final validation predictions
final_val_preds = np.zeros((x_train.shape[0]))

#final test predictions to be stored in this var
final_test_preds = np.zeros((x_test.shape[0]))

start_time = time.time()
for fold in range(num_splits):
    tr_ind, val_ind = splits[fold]
    all_val_preds = []
    all_test_preds = []
    #print('Training set size: ', len(tr_ind))
    #print('Val set size: ', len(val_ind))
    x_training = x_train[tr_ind]
    y_training = y_train[tr_ind]
    y_aux_training = y_aux_train[tr_ind]
    
    x_val = x_train[val_ind]
    y_val = y_train[val_ind]
    y_aux_val = y_aux_train[val_ind]
    
    
    
    x_train_torch = torch.tensor(x_training, dtype=torch.long).to(device)
    x_val_torch = torch.tensor(x_val, dtype=torch.long).to(device)
    y_train_torch = torch.tensor(np.hstack([y_training[:, np.newaxis], y_aux_training]), dtype=torch.float32).to(device)
    
    x_test_torch = torch.tensor(x_test, dtype=torch.long).to(device)
    
    ###
    
    #test_dataset = data.TensorDataset(x_test_torch, test_lengths)
    #train_dataset = data.TensorDataset(x_train_torch, lengths, y_train_torch)
    #val_dataset = data.TensorDataset(x_val_torch)

    #train_collator = SequenceBucketCollator(lambda lengths: lengths.max(), sequence_index=0, length_index=1, label_index=2)
    #test_collator = SequenceBucketCollator(lambda lengths: lengths.max(), sequence_index=0, length_index=1)
    
    ####
    
    train_dataset = data.TensorDataset(x_train_torch, y_train_torch)
    val_dataset = data.TensorDataset(x_val_torch)
    test_dataset = data.TensorDataset(x_test_torch)

    for model_idx in range(NUM_MODELS):
        print('Model ', model_idx)
        seed_everything(1234 + model_idx)

        model = NeuralNet(embedding_matrix, y_aux_train.shape[-1])
        model = model.to(device)

        #training using training and validation set
        model = train_model(model, train_dataset, val_dataset, output_dim=y_train_torch.shape[-1], loss_fn=nn.BCEWithLogitsLoss(reduction='mean'))
        
        #prediction on validation set (used for score measurement)
        val_pred = predict(model, val_dataset, output_dim=y_train_torch.shape[-1]) #val preds on the val split
        all_val_preds.append(val_pred)
        #print(len(val_pred))
        
        #prediction on entire test set (actual predictions to be submitted)
        test_pred = predict(model, test_dataset, output_dim=y_train_torch.shape[-1])
        all_test_preds.append(test_pred)
        
        print()
        
    #average validation prediction amongst all models
    avg_val = np.mean(all_val_preds, axis=0)[:, 0] #will be printed out per split
    final_val_preds[val_ind] += avg_val
    
    avg_test = np.mean(all_test_preds, axis=0)[:, 0]
    
    final_test_preds += avg_test

    y_true = y_train[val_ind] #true scores for this validation set
    y_identity = y_train_identity[val_ind] #true scores for the identity groups for this validation set
    #print(y_true)
    #print(y_identity)
    evaluator = JigsawEvaluator(y_true, y_identity)
    #print(avg_val)
    auc_score = evaluator.get_final_metric(avg_val)

    roc_score = roc_auc_score(y_train[val_ind], avg_val)
    print('Kaggle Score: ', auc_score)
    print('ROC score: ', roc_score)
    
    del x_train_torch
    del x_val_torch
    del y_train_torch
    del x_test_torch
    del train_dataset
    del val_dataset
    del test_dataset
    gc.collect()
    torch.cuda.empty_cache()
    
    print('=============End-of-Fold================')
    
end_time = time.time()
print('Time: ', end_time - start_time)

#Final combined score
y_true = y_train
y_identity = y_train_identity
evaluator = JigsawEvaluator(y_true, y_identity)
auc_score = evaluator.get_final_metric(final_val_preds)
print('Final Kaggle Score: ', auc_score)
print('Final ROC score: ', roc_auc_score(y_train, final_val_preds))

#average test predictions AGAIN this time by number of splits
final_test_preds /= num_splits
#print(final_test_preds)

Model  0


RuntimeError: Expected object of backend CPU but got backend CUDA for argument #2 'other'

In [None]:
submission = pd.DataFrame.from_dict({
    'id': test['id'],
    'prediction': final_test_preds
})

submission.to_csv('submission.csv', index=False)
submission.head()