In [1]:
import json
import pandas as pd
from collections import Counter
from tqdm.auto import tqdm
import os

In [2]:
from nltk.tokenize import RegexpTokenizer
from scipy import sparse
from collections import Counter
import numpy as np
import torch
import torch.nn as nn
from torch.utils import data

## Read and Preprocess Data

In [3]:
with open("selected_articles.json", 'r') as file:
    selected_articles = json.load(file)

In [4]:
total_df = pd.DataFrame(selected_articles)[["id", "title", "authors", "categories", "abstract"]]
total_df = total_df.sample(frac = 1.0)
total_df = total_df.reset_index(drop = True)
total_df.head()

Unnamed: 0,id,title,authors,categories,abstract
0,1910.13472,Locally recoverable codes on surfaces,"Cec\'ilia Salgado, Anthony V\'arilly-Alvarado ...",cs.IT math.AG math.IT,A linear error correcting code is a subspace...
1,1505.05124,Locally Repairable Regenerating Codes: Node Un...,Imad Ahmad and Chih-Chun Wang,cs.IT math.IT,Locally repairable codes (LRCs) are ingeniou...
2,2110.15133,Deep Calibration of Interest Rates Model,Mohamed Ben Alaya and Ahmed Kebaier and Djibri...,q-fin.ST cs.LG,For any financial institution it is a necess...
3,2204.09323,Self-supervised Learning for Sonar Image Class...,Alan Preciado-Grijalva and Bilal Wehbe and Mig...,cs.CV cs.LG,Self-supervised learning has proved to be a ...
4,908.066,A Short Note on Compressed Sensing with Partia...,Laurent Jacques,cs.IT math.IT,This short note studies a variation of the C...


In [5]:
category_list = ["cs.AI", "cs.DB", "cs.IT", "cs.LG", "cs.SI"]
category_dict = {"cs.AI":0, "cs.DB":1, "cs.IT":2, "cs.LG":3, "cs.SI":4}
token_id = {'<PAD>':0, '<GO>':1, '<EOS>': 2, '<UNK>': 3}

In [6]:
def built_vocabulary(traindata, tokenizer, num_special_token):
    vocabulary = []
    traindata['word_list'] = traindata['abstract'].apply(lambda x: [word.lower() for word in tokenizer.tokenize(x)])
    mean_text_length = (traindata['word_list'].apply(lambda x: len(x))).mean()
    for i, v in tqdm(traindata['word_list'].iteritems()):
        vocabulary += v
    voc_dict = Counter(vocabulary)
    uniq_voc = list(set(vocabulary))
    for word in tqdm(uniq_voc):
        if voc_dict[word]<5:
            voc_dict.pop(word)
    word_place_dict={}
    uniq_voc = list(voc_dict.keys())
    for i in tqdm(range(len(uniq_voc))):
        word_place_dict[uniq_voc[i]] = i + num_special_token
    return word_place_dict, mean_text_length

def process_data(data_df, word_dict, categories, category_id, special_token_id,
                 sequence_length, if_train = True, tokenizer = None): 
    uniq_words = word_dict.keys()
    ground_truth = np.zeros((len(data_df), len(categories)))
    sequences = np.full((len(data_df), sequence_length), special_token_id['<PAD>'])
    
    # For evaluation and test, word_list need 
    if not if_train:
        data_df['word_list'] = data_df['abstract'].apply(lambda x: [word.lower() for word in tokenizer.tokenize(x)])
    
    for idx, row in tqdm(data_df.iterrows()):
        sequences[idx][0] = special_token_id['<GO>']
        j = 0
        word_list = row["word_list"]
        while j<len(word_list):
            if word_list[j] in uniq_words:
                sequences[idx][j+1] = word_dict[word_list[j]]
            else:
                sequences[idx][j+1] = special_token_id['<UNK>']
                
            if j==sequence_length-2:
                break
            else:
                j += 1
        
        if j!=sequence_length-1:
            sequences[idx][j+1] = special_token_id['<EOS>']
                
        c_list = row['categories'].split(" ")
        for c in c_list:
            if c in categories:
                ground_truth[idx][category_id[c]] = 1  
                       
    return sequences, ground_truth

In [7]:
# Divide the train set, evaluate set and test set
train_df = total_df.iloc[:180000]
valid_df = total_df.iloc[180000:192000]
valid_df = valid_df.reset_index(drop=True)
test_df = total_df.iloc[192000:]
test_df = test_df.reset_index(drop=True)

In [8]:
# Get the vocabulary
regtokenizer = RegexpTokenizer(r'\w+')
word_dict, mean_text_length = built_vocabulary(train_df, regtokenizer, 4)
print("Mean text length is", mean_text_length)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  traindata['word_list'] = traindata['abstract'].apply(lambda x: [word.lower() for word in tokenizer.tokenize(x)])


0it [00:00, ?it/s]

  0%|          | 0/144994 [00:00<?, ?it/s]

  0%|          | 0/47527 [00:00<?, ?it/s]

Mean text length is 171.10386666666668


In [9]:
train_X, train_Y_np = process_data(train_df, word_dict, category_list, category_dict, token_id, 220)
valid_X, valid_Y_np = process_data(valid_df, word_dict, category_list, category_dict, 
                                   token_id, 220, False, regtokenizer)
test_X, test_Y_np = process_data(test_df, word_dict, category_list, category_dict, 
                                 token_id, 220, False, regtokenizer)

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

In [17]:
#train_Y = torch.from_numpy(train_Y_np).float()
#valid_Y = torch.from_numpy(valid_Y_np).float()
#test_Y = torch.from_numpy(test_Y_np).float()

## Define the Dataset

In [18]:
class ArxivDataset(data.Dataset):
    def __init__(self, encodings, labels=None):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        if self.labels is None:
            return torch.tensor(self.encodings[idx])
        return torch.tensor(self.encodings[idx]), torch.tensor(self.labels[idx])

    def __len__(self):
        return len(self.encodings)

In [19]:
train_dataset = ArxivDataset(train_X, train_Y_np)
valid_dataset = ArxivDataset(valid_X, valid_Y_np)
test_dataset = ArxivDataset(test_X, test_Y_np)

## Define Evaluate and Test Function

In [20]:
def evaluate(dev_dataloader, Model, device, loss_fn):
    Model.eval()
    total_acc = 0
    total_loss = 0
    for step, data in tqdm(enumerate(dev_dataloader), desc = "Evaluation step", leave = False):
        inputs, labels = data
        outputs = Model(inputs.to(device))
        ground_truth = labels.to(device)
        total_acc += (((outputs>0.5).float() == ground_truth).float()).mean()
        loss = loss_fn(outputs, ground_truth)
        total_loss += loss.item()
        
    return total_acc/len(dev_dataloader), total_loss/len(dev_dataloader)       

In [21]:
def test(test_dataloader, Model, device, batch_size = 16, cag_num = 5):
    Model.eval()
    total_acc = 0
    tp_fp_fn = np.ones((3, cag_num))
    for step, data in tqdm(enumerate(test_dataloader), desc = "Evaluation step", leave = False):
        inputs, labels = data
        tmp_batch_size = labels.shape[0]
        outputs = Model(inputs.to(device))
        ground_truth = labels.to(device)
        classify_results = (outputs>0.5).float()
        total_acc += ((classify_results == ground_truth).float()).mean()
        for j in range(tmp_batch_size):
            for k in range(cag_num):
                if classify_results[j][k]==1:
                    if classify_results[j][k] == ground_truth[j][k]:
                        tp_fp_fn[0][k] += 1
                    else:
                        tp_fp_fn[1][k] += 1
                        
                else:
                    if ground_truth[j][k]==1:
                        tp_fp_fn[2][k] += 1
        
    precision = tp_fp_fn[0]/(tp_fp_fn[0]+tp_fp_fn[1])
    recall = tp_fp_fn[0]/(tp_fp_fn[0]+tp_fp_fn[2])
    macrof1 = 2/(1/precision.mean() + 1/recall.mean())
    mean_tp_fp_fn = tp_fp_fn.mean(axis = 1)
    microf1 = 2*mean_tp_fp_fn[0]/(2*mean_tp_fp_fn[0] + mean_tp_fp_fn[1] + 2*mean_tp_fp_fn[2])
    print(tp_fp_fn)
        
    return total_acc/len(test_dataloader), macrof1, microf1

## Define Models and Args 

In [22]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dim = len(word_dict.keys()) + 4
embedding_dim = 256
hidden_dim = 256 
num_layers = 2 
if_bidirect = True
num_class = len(category_list)
num_epochs = 5
batch_size = 32

In [23]:
class GRU_Model(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, num_layers, num_class, if_bidirect = False, dropout=0.5):
        super(GRU_Model, self).__init__()
        self.embedding = torch.nn.Embedding(input_dim, embedding_dim)
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.dropout = dropout
        self.num_direct = 2 if if_bidirect else 1
        self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=num_layers, batch_first=True, bidirectional = if_bidirect)
        self.dropout_layer = nn.Dropout(dropout)
        self.linear_layer = nn.Linear(self.num_direct*hidden_dim, num_class)
        self.out_layer = nn.Sigmoid()

        # Initialize
        self.embedding.weight.data.uniform_(-1e-5, 1e-5)
        self.linear_layer.weight.data.uniform_(-1e-5, 1e-5)
        
    
    def forward(self, inputs):
        inputs = self.embedding(inputs)
        x, _ = self.gru(inputs, None)
        if self.num_direct == 2:
            x = torch.cat((x[:, -1, :self.hidden_dim], x[:, 0, self.hidden_dim:]), 1)
        else:
            x = x[:, -1, :] 
        x = self.out_layer(self.linear_layer(self.dropout_layer(x)))
        return x


## Train

In [24]:
train_dataloader = data.DataLoader(train_dataset, batch_size = batch_size)
valid_dataloader = data.DataLoader(valid_dataset, batch_size = batch_size)
test_dataloader = data.DataLoader(test_dataset, batch_size = batch_size)

In [27]:
loss_function = nn.CrossEntropyLoss()
model = GRU_Model(dim, embedding_dim, hidden_dim, num_layers, num_class, if_bidirect)
model.to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
loss_record = [] 
eval_accuracy_list = []
eval_loss_list = []
k = 0

tr_loss = 0
model.train()
for epoch in tqdm(range(num_epochs), desc = "epoch", leave = True):
    for step, batch in tqdm(enumerate(train_dataloader), desc = "step", leave = False):
        inputs, labels = tuple(t.to(device) for t in batch)
        
        outputs = model.forward(inputs)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
        model.zero_grad()
        k += 1
        tr_loss += loss.item()
        

        if k%100 == 0 and k!=0:
            loss_record.append(tr_loss/100)            
            if k%1000 == 0:
                eval_acc, eval_loss = evaluate(valid_dataloader, model, device, loss_function)
                eval_accuracy_list.append(eval_acc)
                eval_loss_list.append(eval_loss)
                print("Mean Train Loss", tr_loss/100, "Mean evaluation accuracy", eval_acc, 
                     "Mean evaluation loss", eval_loss)
                model.train()
            
            tr_loss = 0
                
    save_path = "./checkpoint_gru/checkpoint-"+str(epoch+1)
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    model.to(torch.device("cpu"))
    torch.save(model, save_path + "/gru_model_"+str(epoch+1)+".pt")
    model.to(device)


epoch:   0%|          | 0/5 [00:00<?, ?it/s]

step: 0it [00:00, ?it/s]

Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.5787721820548177 Mean evaluation accuracy tensor(0.7070, device='cuda:0') Mean evaluation loss 1.5819964559276898


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.5937477783486247 Mean evaluation accuracy tensor(0.7070, device='cuda:0') Mean evaluation loss 1.5815760117073854


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.594785363599658 Mean evaluation accuracy tensor(0.7070, device='cuda:0') Mean evaluation loss 1.581682127058506


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.5772892357409 Mean evaluation accuracy tensor(0.7070, device='cuda:0') Mean evaluation loss 1.5811045299172402


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.4249455945193767 Mean evaluation accuracy tensor(0.8122, device='cuda:0') Mean evaluation loss 1.4196031926423311


step: 0it [00:00, ?it/s]

Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.4051466304250062 Mean evaluation accuracy tensor(0.8293, device='cuda:0') Mean evaluation loss 1.4047494386732577


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.3813016783073544 Mean evaluation accuracy tensor(0.8493, device='cuda:0') Mean evaluation loss 1.3904852069467306


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.382972399983555 Mean evaluation accuracy tensor(0.8593, device='cuda:0') Mean evaluation loss 1.385834967240691


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.3921737109683454 Mean evaluation accuracy tensor(0.8585, device='cuda:0') Mean evaluation loss 1.3828888927549123


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.373969420660287 Mean evaluation accuracy tensor(0.8602, device='cuda:0') Mean evaluation loss 1.3770951221684615


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.3722691129893065 Mean evaluation accuracy tensor(0.8582, device='cuda:0') Mean evaluation loss 1.3754596312095722


step: 0it [00:00, ?it/s]

Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.3736258474737406 Mean evaluation accuracy tensor(0.8610, device='cuda:0') Mean evaluation loss 1.373585111523668


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.3633460587635637 Mean evaluation accuracy tensor(0.8571, device='cuda:0') Mean evaluation loss 1.3714622069795925


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.3812199027463794 Mean evaluation accuracy tensor(0.8625, device='cuda:0') Mean evaluation loss 1.3708459758460523


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.3737359531968831 Mean evaluation accuracy tensor(0.8593, device='cuda:0') Mean evaluation loss 1.3714800573140382


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.3420529473014176 Mean evaluation accuracy tensor(0.8596, device='cuda:0') Mean evaluation loss 1.378006750990947


step: 0it [00:00, ?it/s]

Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.358885349072516 Mean evaluation accuracy tensor(0.8671, device='cuda:0') Mean evaluation loss 1.369292905251185


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.3475418585538863 Mean evaluation accuracy tensor(0.8641, device='cuda:0') Mean evaluation loss 1.3694016814430554


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.3532063624449073 Mean evaluation accuracy tensor(0.8703, device='cuda:0') Mean evaluation loss 1.3726520235886177


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.3501915108412503 Mean evaluation accuracy tensor(0.8671, device='cuda:0') Mean evaluation loss 1.368821869045496


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.3348415513522922 Mean evaluation accuracy tensor(0.8699, device='cuda:0') Mean evaluation loss 1.3720403171777724


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.3598679750971496 Mean evaluation accuracy tensor(0.8653, device='cuda:0') Mean evaluation loss 1.3699206720739603


step: 0it [00:00, ?it/s]

Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.3573012106493116 Mean evaluation accuracy tensor(0.8629, device='cuda:0') Mean evaluation loss 1.374916678706805


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.3765248600393534 Mean evaluation accuracy tensor(0.8647, device='cuda:0') Mean evaluation loss 1.3696642367442449


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.326667442973703 Mean evaluation accuracy tensor(0.8659, device='cuda:0') Mean evaluation loss 1.3677313841780028


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.3545829099230469 Mean evaluation accuracy tensor(0.8650, device='cuda:0') Mean evaluation loss 1.3710541903277238


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.3622788935527206 Mean evaluation accuracy tensor(0.8638, device='cuda:0') Mean evaluation loss 1.3681210251400868


Evaluation step: 0it [00:00, ?it/s]

Mean Train Loss 1.3617389087937772 Mean evaluation accuracy tensor(0.8669, device='cuda:0') Mean evaluation loss 1.3691597534120084


## Final Test and Store Results

In [28]:
train_loss = np.array([loss_record, np.arange(100, k+1, 100)])
train_loss[0]

array([1.76375148, 1.59062484, 1.58889554, 1.57544296, 1.58522168,
       1.5837742 , 1.58814364, 1.58652863, 1.57687202, 1.57877218,
       1.59094861, 1.58300302, 1.58314809, 1.57143514, 1.60342165,
       1.56733006, 1.57802993, 1.57699705, 1.57200197, 1.59374778,
       1.57713208, 1.59753102, 1.58446734, 1.58759021, 1.55983184,
       1.58923013, 1.58600149, 1.5883169 , 1.5733782 , 1.59478536,
       1.57961544, 1.56147328, 1.56816488, 1.61746859, 1.58055662,
       1.5631631 , 1.59126473, 1.56932118, 1.59436641, 1.57728924,
       1.55293036, 1.52318272, 1.4684927 , 1.46276019, 1.44382296,
       1.42637039, 1.41771146, 1.39876114, 1.4062425 , 1.42494559,
       1.43164808, 1.43097645, 1.4191723 , 1.41448223, 1.4299    ,
       1.42975271, 1.4175339 , 1.41314412, 1.39001095, 1.40514663,
       1.41120102, 1.38967379, 1.40722321, 1.40195351, 1.38775116,
       1.39106262, 1.39694539, 1.38492972, 1.39544082, 1.38130168,
       1.40307848, 1.37559829, 1.38720215, 1.38302378, 1.37556

In [29]:
end_eval_acc, end_eval_loss = evaluate(valid_dataloader, model, device, loss_function)
eval_accuracy_list.append(end_eval_acc)
eval_loss_list.append(end_eval_loss)
end_eval_acc, end_eval_loss

Evaluation step: 0it [00:00, ?it/s]

(tensor(0.8674, device='cuda:0'), 1.3677012896786134)

In [32]:
eval_accuracy_list = [acc.cpu() for acc in eval_accuracy_list]
#eval_loss_list = [loss.cpu() for loss in eval_loss_list]
eval_steps = np.arange(1000, k+1, 1000)
eval_steps = np.append(eval_steps, k)
eval_results = np.array([eval_accuracy_list, eval_loss_list, eval_steps])
eval_results 

array([[7.07049966e-01, 7.07049966e-01, 7.07049966e-01, 7.07049966e-01,
        8.12166750e-01, 8.29299808e-01, 8.49299967e-01, 8.59283447e-01,
        8.58450115e-01, 8.60216558e-01, 8.58216882e-01, 8.61000240e-01,
        8.57099771e-01, 8.62516999e-01, 8.59316826e-01, 8.59583139e-01,
        8.67133617e-01, 8.64066720e-01, 8.70333493e-01, 8.67149889e-01,
        8.69916975e-01, 8.65300357e-01, 8.62883031e-01, 8.64716649e-01,
        8.65883112e-01, 8.65033269e-01, 8.63783002e-01, 8.66865933e-01,
        8.67416561e-01],
       [1.58199646e+00, 1.58157601e+00, 1.58168213e+00, 1.58110453e+00,
        1.41960319e+00, 1.40474944e+00, 1.39048521e+00, 1.38583497e+00,
        1.38288889e+00, 1.37709512e+00, 1.37545963e+00, 1.37358511e+00,
        1.37146221e+00, 1.37084598e+00, 1.37148006e+00, 1.37800675e+00,
        1.36929291e+00, 1.36940168e+00, 1.37265202e+00, 1.36882187e+00,
        1.37204032e+00, 1.36992067e+00, 1.37491668e+00, 1.36966424e+00,
        1.36773138e+00, 1.37105419e+00,

In [33]:
np.savez("GRU_results.npz", train_loss, eval_results)

In [34]:
test(test_dataloader, model, device, batch_size)

Evaluation step: 0it [00:00, ?it/s]

[[3.2740e+03 1.0000e+00 3.0930e+03 1.0645e+04 1.0000e+00]
 [2.9600e+03 1.0000e+00 3.0200e+02 4.8560e+03 1.0000e+00]
 [1.2930e+03 5.8200e+02 4.7300e+02 1.4000e+02 1.4890e+03]]


(tensor(0.8676, device='cuda:0'), 0.5643636335154463, 0.6791744840525329)

## Test

In [19]:
k

22500

In [27]:
eval_accuracy_list[0].device

device(type='cpu')

In [9]:
a[1][2] = 5
a

tensor([[[1],
         [1],
         [1],
         [1]],

        [[1],
         [1],
         [5],
         [1]],

        [[1],
         [1],
         [1],
         [1]],

        [[1],
         [1],
         [1],
         [1]],

        [[1],
         [1],
         [1],
         [1]]])

In [27]:
test_embedding = torch.nn.Embedding(20, 10)

In [37]:
a = torch.tensor([[11, 12], [13, 14], [1,1]])
a.shape[0]

3

In [38]:
test_embedding(a)

NameError: name 'test_embedding' is not defined

In [51]:
b = np.array([[1,2,3,4],[5,6,7,8]])
b

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [52]:
np.expand_dims(b, axis=1)

array([[[1, 2, 3, 4]],

       [[5, 6, 7, 8]]])

In [65]:
torch.tensor(train_Y_np[:2][0])

tensor([[0., 0., 0., 1., 0.]], dtype=torch.float64)

In [84]:
inputs, labels = train_dataset.__getitem__(0)

In [85]:
labels

tensor([0., 0., 0., 1., 0.], dtype=torch.float64)

In [18]:
train_dataset[0][0]

tensor([  1,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,
          4,  17,  18,  19,  20,  11,  15,  21,  22,  23,  24,  25,  20,  26,
         27,  22,  28,  29,  30,  31,  32,  33,  34,  35,  11,  36,  37,  38,
         39,  40,  41,  29,  30,  42,  43,  44,  45,  35,  46,  41,  47,  35,
         48,  49,  50,  41,  51,  41,  52,   3,  53,  54,  55,  56,  34,  57,
         35,  11,  35,  58,  15,  59,  60,  61,  18,  62,  63,  64,  65,  54,
         18,  66,  40,  67,  18,  68,  69,  70,  71,  15,  72,  22,  73,  35,
         11,  36,  74,  37,  38,  39,  75,  76,  77,  30,  78,  79,  35,  74,
         40,  67,  15,  80,  81,  22,  35,  82,  83,  41,  35,  60,  58,  84,
         22,  85,  35,  72,  68,  69,  70,  71,  18,  86,  87,  88,  89,  90,
         91,  15,  92,  22,  93,  35,  74,  40,  67,  94,  35,  95,  96,  41,
         35,  91,  15,  97,  98,  18,  99, 100,  82,  70, 101, 102, 103,   3,
        104, 105, 101, 106,  14, 107,  15,  72,  22, 108,  35,  

In [4]:
test = False
a = 2 if test else 1
a

1