## File Configuration

In [2]:
!pip install transformers



In [3]:
!pip install pytorch-lightning



In [5]:
import pytorch_lightning as pl
#from pytorch_lightning.metrics.functional import accuracy, f1, auroc
#from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
import torch
import torch.nn as nn
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from transformers import AdamW
import random, os
import numpy as np 
import pandas as pd
import gc
import time
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter


from tqdm.auto import tqdm

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
## for the colab version of the notebook
#from google.colab import drive
#drive.mount('/content/drive')

In [7]:
train_df = pd.read_csv("/Users/andrewcosta/Desktop/Milan/text_mining/project/data/train.csv")
test_df = pd.read_csv("/Users/andrewcosta/Desktop/Milan/text_mining/project/data/test.csv")

## Exploratory Data Analysis

In [None]:
train_df.head()

In [None]:
train_df.info()

In [None]:
train_df.describe()

In [None]:
train_df.isna().sum()

In [None]:
unique_classes = train_df.label.unique()
print(f'Number of Unique Classes: {len(unique_classes)}')
print(f'What are those classes? {unique_classes}')

In [6]:
train_df['sequence_length'] = train_df['sequence'].apply(lambda x: len(x))

In [None]:
train_df.sequence.nunique()

In [None]:
train_df.head()

In [7]:
test_df['sequence_length'] = test_df['sequence'].apply(lambda x: len(x))
test_df.head()

Unnamed: 0,ID,sequence,sequence_length
0,test_0,MPQHLLSGVRNFTALAVFIAAGNCLAATDLQAVVDANVKPLMQQQA...,384
1,test_1,MAELPQSRINERNITSEMRESFLDYAMSVIVARALPDVRDGLKPVH...,886
2,test_2,MSYRDLGLIDSEVIAERRVRALDDSSPSAVPTTGVRVFGCGHDEAV...,350
3,test_3,MMIKSLCCALLLGLSCSALAAPVSEKQLAEVVANTVTPLMKAQSVP...,381
4,test_4,MTTLEALILGIIQGVFMFVPVSSTSHLALAQHWLISAGSGMPAPDS...,291


In [None]:
def plot_counts(df, data_name):
    sns.distplot(df['sequence_length'])
    plt.title(f'Sequnce Count: {data_name}')
    plt.grid(True)

In [None]:
plot_counts(train_df, 'Train')

In [None]:
plot_counts(test_df, 'Test')

In [None]:
def amino_acid_frequency(df, data_name):
    df = df.apply(lambda x: " ".join(x))

    amino_acids = []

    for i in df:
        amino_acids.extend(i)
  
    aa_dict = Counter(amino_acids)
    aa_dict.pop(' ') # remove white space

    print(f'Amino Acid: {data_name}')
    print(f'Total Unique Codes: {len(aa_dict.keys())}')

    df = pd.DataFrame({'Code': list(aa_dict.keys()), 'Freq': list(aa_dict.values())})

    df.sort_values('Freq', ascending=False).reset_index()[['Code', 'Freq']]
  
    return sns.barplot(data=df, x = 'Code', y = 'Freq', order = df.sort_values('Freq', ascending = False).Code), plt.title(f'Amino Acid Count: {data_name}')

In [None]:
amino_acid_frequency(train_df['sequence'], 'Train')

In [None]:
amino_acid_frequency(test_df['sequence'], 'Test')

## Feature Engineering

**1 indicates that the gene is antibody resistant and 0 indicates that it is not.**

In [None]:
train_df["len_seq"] = train_df["sequence"].apply(lambda seq: len(seq))

In [None]:
train_df.head(10)

 #### len sequences distribution

In [None]:
counts, bins = np.histogram(train_df["len_seq"])
#plt.hist(bins[:-1], bins, weights=counts)
train_df.len_seq.plot.density(color='green')
plt.title('Density plot for Sequence length')
plt.show()

In [None]:
fig, axes = plt.subplots(1, 1, figsize=(10, 8), sharey=False)
fig.suptitle('proteins per class')

sns.countplot(train_df["label"] , ax=axes)
axes.set_title(train_df["label"].name)

### preprocessing

#### add spaces

In [8]:
train_df["sequence"] = train_df["sequence"].apply(lambda seq: " ".join(seq))

In [None]:
train_df["sequence"][:3]

#### remove rare amino acid

In [9]:
train_df["sequence"] = train_df["sequence"].str.replace(r"[UZOB]", 'X', regex=True)

In [None]:
train_df["sequence"][:3]

### Split Data

In [10]:
from sklearn.model_selection import train_test_split
train, val = train_test_split(train_df, test_size=0.07, random_state=42, shuffle = True)
train.shape, val.shape

((11016, 4), (830, 4))

In [None]:
val.head()

In [None]:
fig, axes = plt.subplots(1, 1, figsize=(8, 5), sharey=False)
fig.suptitle('proteins per class')

sns.countplot(val["label"] , ax=axes)
axes.set_title(val["label"].name)

#### Tokenization

In [11]:
from transformers import BertModel, BertTokenizer, AutoConfig, AutoModel, BertConfig
import re
import torch
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from torch.utils.data import TensorDataset
BERT_MODEL_NAME = "Rostlab/prot_bert"

In [12]:
tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
config = BertConfig.from_pretrained(BERT_MODEL_NAME)

In [14]:
encoded_data_train = tokenizer.batch_encode_plus(
    train["sequence"].values,
    truncation=True,
    add_special_tokens=True,
    max_length=256,
    padding='max_length',
    return_attention_mask=True,
    return_tensors='pt',
)

encoded_data_val = tokenizer.batch_encode_plus(
    val["sequence"].values,
    truncation=True,
    add_special_tokens=True,
    max_length=256,
    padding='max_length',
    return_attention_mask=True,
    return_tensors='pt',
)

In [15]:
input_ids_train = encoded_data_train["input_ids"]
attention_masks_train = encoded_data_train["attention_mask"]
labels_train = torch.tensor(train["label"].values)

input_ids_val = encoded_data_val["input_ids"]
attention_masks_val = encoded_data_val["attention_mask"]
labels_val = torch.tensor(val["label"].values)

In [16]:
train_dataset = TensorDataset(input_ids_train, attention_masks_train, labels_train)
val_dataset = TensorDataset(input_ids_val, attention_masks_val, labels_val)

In [17]:
# Create the DataLoaders for our training and validation sets.
# We'll take training samples in random order. 
train_dataloader = DataLoader(
    train_dataset,
    sampler=RandomSampler(train_dataset),
    batch_size=64,
)
# For validation the order doesn't matter, so we'll just read them sequentially.
val_dataloader = DataLoader(
    val_dataset,
    sampler=SequentialSampler(val_dataset),
    batch_size=64,
)

### Model

In [18]:
class ABRClassifier(nn.Module):
    def __init__(self, num_classes):
        super(ABRClassifier, self).__init__()
        self.model_name = "Rostlab/prot_bert"
        config = AutoConfig.from_pretrained(self.model_name)
        self.prot_bert = AutoModel.from_pretrained(self.model_name, config=config)
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(self.prot_bert.config.hidden_size, num_classes)
        
    def forward(self, input_ids, attention_mask):
        outputs = self.prot_bert(input_ids = input_ids, attention_mask = attention_mask)
        pooled_output = outputs[1]
        pooled_output = self.dropout(pooled_output)
        logits = self.fc(pooled_output)
        return logits

In [19]:
model = ABRClassifier(num_classes = 2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-4)
loss = nn.CrossEntropyLoss()

Some weights of the model checkpoint at Rostlab/prot_bert were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [1]:
EPOCHS = 10
num_classes = 2
for epoch in range(EPOCHS):
    for batch in train_dataloader:
        input_ids = batch[0].to(device)
        attention_mask = batch[1].to(device)
        labels = batch[2].to(device)
        
        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask)
        logits = model.fc(outputs)
        loss = loss(logits, labels)
        loss.backward()
        optimizer.step()
        
    print(f'Epoch: {epoch+1}/{EPOCHS}, Loss: {loss.item(): .4f}')

NameError: name 'train_dataloader' is not defined

#### Dataloaders

Tokenization

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
device

In [None]:
model.to(device)

##Model

In [None]:
!pip install transformers

In [None]:
from transformers import BertModel, BertTokenizer, AdamW
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
pretrained_model_name = "Rostlab/prot_bert"

class resistance_classifier(nn.Module):
  def __init__(self, num_classes):
    super(resistance_classifier, self).__init__()
    self.bert = BertModel.from_pretrained(pretrained_model_name)
    self.classifier = nn.Sequential(nn.Dropout(p-0.2),
                                    nn.Linear(self.bert.config.hidden_size, num_classes),
                                    nn.Tanh())
  
  def forward(self, input_ids, attention_mask):
    output = self.bert(
        input_ids = input_ids,
        attention_mask = attention_mask
    )
  
    return self.classifier(output.pooler_output)

## **Feature extraction using pretrained prot bert**

In [None]:
emb_train_list = []
class_train_list = []
model.eval()
for batch in tqdm(train_dataloader):
    batch = tuple(k.to(device) for k in batch)
    classes = batch[-1]
    inputs = {'input_ids':      batch[0],
            'attention_mask': batch[1],
            }
  # Extract per-residue embeddings (on GPU)
    with torch.no_grad():
        outputs = model(**inputs)

    embeddings_t = outputs.last_hidden_state
    embeddings_t = embeddings_t.detach().cpu().numpy()
    classes = classes.detach().cpu().numpy()
  # Generate per-sequence embeddings via averaging
    seq_emb = np.mean(embeddings_t, axis=1)
    emb_train_list.append(seq_emb)
    class_train_list.append(classes)

In [None]:
batch

In [None]:
embeddings_t

In [None]:
embeddings_t.shape

In [None]:
print(seq_emb)
print(seq_emb.shape)

In [None]:
emb_val_list = []
class_val_list = []
model.eval()
for batch in tqdm(val_dataloader):
    batch = tuple(k.to(device) for k in batch)
    classes = batch[-1]
    inputs = {'input_ids':      batch[0],
            'attention_mask': batch[1],
            }
  # Extract per-residue embeddings (on GPU)
    with torch.no_grad():
        outputs = model(**inputs)

    embeddings = outputs.last_hidden_state
    embeddings = embeddings.detach().cpu().numpy()
    classes = classes.detach().cpu().numpy()
  # Generate per-sequence embeddings via averaging
    seq_val_emb = np.mean(embeddings, axis=1)
    emb_val_list.append(seq_val_emb)
    class_val_list.append(classes)

In [None]:
# get class train embeddings
emb_train_vectors = np.concatenate(emb_train_list, axis=0)

In [None]:
emb_train_vectors.shape

In [None]:
emb_train_vectors[1]

In [None]:
# get class train vectors
class_train_vectors = np.concatenate(class_train_list, axis=0)

In [None]:
class_train_vectors.shape

In [None]:
class_train_vectors[:10]

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
label_encoder_class = LabelEncoder()

In [None]:
import plotly
import numpy as np
import plotly.graph_objs as go
from sklearn.decomposition import PCA

## **2D PCA visualization**

In [None]:
import plotly.express as px
seq_vectors = emb_train_vectors.copy()
two_dim = PCA(random_state=0, n_components=2).fit_transform(seq_vectors)
fig = px.scatter(x=two_dim[:,0], y=two_dim[:,1], color=class_train_vectors)
fig.show()

## **3D PCA for feature vizualisation**

In [None]:
seq_vectors = emb_train_vectors.copy()
n_dim = PCA(random_state=0, n_components=3).fit_transform(seq_vectors)
fig = px.scatter_3d(x=n_dim[:,0], y=n_dim[:,1], z=n_dim[:,2], color=class_train_vectors)
fig.show()

## **T-distributed Stochastic Neighbor Embedding**

### **2D vizualisation**

In [None]:
from sklearn.manifold import TSNE
fifty_dim = PCA(random_state=0, n_components=100).fit_transform(seq_vectors)
two_dim = TSNE(n_components=2).fit_transform(fifty_dim)
fig = px.scatter(x=two_dim[:,0], y=two_dim[:,1], color=class_train_vectors)
fig.show()

### **3D vizualisation**

In [None]:
fifty_dim = PCA(random_state=0, n_components=100).fit_transform(seq_vectors)
three_dim = TSNE(n_components=3).fit_transform(fifty_dim)
fig = px.scatter_3d(x=three_dim[:,0], y=three_dim[:,1], z=three_dim[:,2], color=class_train_vectors)
fig.show()

## **MD Scaling** 

## **2D vizualisations**

In [None]:
from sklearn.manifold import MDS
fifty_dims = PCA(random_state=0, n_components=50).fit_transform(seq_vectors)
md_scaling = MDS(n_components=3)
S_scaling = md_scaling.fit_transform(fifty_dims)
fig = px.scatter_3d(x=S_scaling[:,0], y=S_scaling[:,1], z=S_scaling[:,2], color=class_train_vectors)
fig.show()

In [None]:
fig = px.scatter(x=S_scaling[:,0], y=S_scaling[:,1], color=class_train_vectors)
fig.show()

In [None]:
fifty_dims = PCA(random_state=0, n_components=3).fit_transform(seq_vectors)
md_scaling = MDS(n_components=3)
S3_scaling = md_scaling.fit_transform(fifty_dims)
fig = px.scatter_3d(x=S3_scaling[:,0], y=S3_scaling[:,1], z=S3_scaling[:,2], color=class_train_vectors)
fig.show()

## **Spectral embeddings** 

In [None]:
from sklearn.manifold import SpectralEmbedding
fifty_dims = PCA(random_state=0, n_components=3).fit_transform(seq_vectors)
spectral = SpectralEmbedding(n_components=3)
S3_spectral = spectral.fit_transform(fifty_dims)

In [None]:
fig = px.scatter_3d(x=S3_spectral[:,0], y=S3_spectral[:,1], z=S3_spectral[:,2], color=class_train_vectors)
fig.show()

## **ISO map embeddings**

In [None]:
from sklearn.manifold import Isomap 
fifty_dims = PCA(random_state=0, n_components=3).fit_transform(seq_vectors)
isomap = Isomap(n_neighbors=2,n_components=3)
S_isomap = isomap.fit_transform(fifty_dims)
fig = px.scatter_3d(x=S_isomap[:,0], y=S_isomap[:,1], z=S_isomap[:,2], color=class_train_vectors)
fig.show()

In [None]:
from sklearn.manifold import Isomap 
fifty_dims = PCA(random_state=0, n_components=50).fit_transform(seq_vectors)
isomap = Isomap(n_components=3)
S_isomap = isomap.fit_transform(fifty_dims)
fig = px.scatter_3d(x=S_isomap[:,0], y=S_isomap[:,1], z=S_isomap[:,2], color=class_train_vectors)
fig.show()

In [None]:
from sklearn.svm import SVC
clf = SVC()

In [None]:
clf.fit(seq_vectors, class_vectors)

In [None]:
#class_vectors_name = label_encoder_class.inverse_transform(class_vectors)

#### finetuning bert pretrained model

In [None]:
#from transformers import BertForSequenceClassification
from transformers import BertModel
BERT_MODEL_NAME = "Rostlab/prot_bert"

In [None]:
BATCH_SIZE = 8
N_EPOCHS = 20

In [None]:
steps_per_epoch=len(train) // BATCH_SIZE
total_training_steps = steps_per_epoch * N_EPOCHS

In [None]:
len(train_dataloader)

In [None]:
len(val)

In [None]:
from torch.optim import Adam
from tqdm import tqdm

def trainer(model, train_dataloader, val_dataloader, learning_rate, epochs , len_val , len_train):

    #train, val = Dataset(train_data), Dataset(val_data)

    #train_dataloader = torch.utils.data.DataLoader(train, batch_size=2, shuffle=True)
    #val_dataloader = torch.utils.data.DataLoader(val, batch_size=2)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr= learning_rate)

    if use_cuda:

            model = model.cuda()
            criterion = criterion.cuda()
            
    val_loss_list = []
  
    for epoch_num in range(epochs):

            total_acc_train = 0
            total_loss_train = 0

            for batch in tqdm(train_dataloader):
                batch = tuple(b.to(device) for b in batch)
                inputs = {'input_ids':      batch[0],
                        'attention_mask': batch[1],
                        'labels':         batch[2],
                        }

                train_label = inputs['labels']
                mask = inputs['attention_mask']
                input_id = inputs['input_ids']

                output = model(input_id, mask)
                
                batch_loss = criterion(output, train_label.long())
                total_loss_train += batch_loss.item()
                
                acc = (output.argmax(dim=1) == train_label).sum().item()
                total_acc_train += acc

                model.zero_grad()
                batch_loss.backward()
                optimizer.step()
            
            total_acc_val = 0
            total_loss_val = 0

            with torch.no_grad():

                for val_b in val_dataloader:
                    val_b = tuple(b.to(device) for b in val_b)
        
                    val_input = {'input_ids':      val_b[0],
                              'attention_mask': val_b[1],
                              'labels':         val_b[2],
                              }

                    val_label = val_input["labels"]
                    mask = val_input['attention_mask']
                    input_id = val_input['input_ids']

                    

                    output = model(input_id, mask)

                    batch_loss = criterion(output, val_label.long())
                    total_loss_val += batch_loss.item()
                    
                    acc = (output.argmax(dim=1) == val_label).sum().item()
                    total_acc_val += acc
            current_val_loss = total_loss_val
            val_loss_list.append(total_loss_val)
            print(
                f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len_train: .3f} \
                | Train Accuracy: {total_acc_train /len_train: .3f} \
                | Val Loss: {total_loss_val / len_val: .3f} \
                | Val Accuracy: {total_acc_val / len_val: .3f}')
            
            if current_val_loss == min(val_loss_list):
                path = os.path.join('/kaggle/working', "epoch"+str(epoch_num + 1)+'_model.pth')
                # recommended way from http://pytorch.org/docs/master/notes/serialization.html
                torch.save(model.state_dict(), path)
                logger.info(f"Saving model: {path} \n")

In [None]:
from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup
import torch
import torch.nn.functional as F
import torch.nn as nn


#PRE_TRAINED_MODEL_NAME = BERT_MODEL_NAME
PRE_TRAINED_MODEL_NAME = 'Rostlab/prot_bert_bfd_localization'
class ProteinClassifier(nn.Module):
    def __init__(self, n_classes):
        super(ProteinClassifier, self).__init__()
        self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
        self.classifier = nn.Sequential(nn.Dropout(p=0.7),
                                        nn.Linear(self.bert.config.hidden_size, n_classes),
                                        nn.Sigmoid())
        
    def forward(self, input_ids, attention_mask):
        output = self.bert(
          input_ids=input_ids,
          attention_mask=attention_mask
        )
        return self.classifier(output.pooler_output)

In [None]:
#from transformers import BertForSequenceClassification
#model = BertForSequenceClassification.from_pretrained("Rostlab/prot_bert", num_labels=2,output_attentions=False,output_hidden_states=False)

In [None]:
epochs = 50
learning_rate = 2e-5
#model = ProteinClassifier()
model = ProteinClassifier(2)

In [None]:
torch.cuda.empty_cache()

In [None]:
len_train = len(train)
len_val = len(val)

In [None]:
trainer(model, train_dataloader, val_dataloader, learning_rate, epochs, len_val, len_train)

In [None]:
torch.cuda.memory_summary(device=None, abbreviated=False)


In [None]:
!nvidia-smi 

In [None]:
def save_model(model):
    path = os.path.join('/content/drive/MyDrive/Iovision_workspace/Logs', 'model.pth')
    # recommended way from http://pytorch.org/docs/master/notes/serialization.html
    torch.save(model.state_dict(), path)
    logger.info(f"Saving model: {path} \n")

#### test preprocessing

In [None]:
test_df = pd.read_csv("/kaggle/input/antibiotic-protein/test (3).csv")

In [None]:
test_df.head()

In [None]:
test_df.columns

In [None]:
test_df["sequence"] = test_df["sequence"].apply(lambda seq: " ".join(seq))

In [None]:
test_df["sequence"] = test_df["sequence"].str.replace(r"[UZOB]", 'X', regex=True)

In [None]:
encoded_data_test = tokenizer.batch_encode_plus(
    test_df["sequence"].values,
    truncation=True,
    add_special_tokens=True,
    max_length=512,
    padding='max_length',
    return_attention_mask=True,
    return_tensors='pt',
)

In [None]:
input_ids_test = encoded_data_test["input_ids"]
attention_masks_test = encoded_data_test["attention_mask"]
#labels_test = torch.tensor(test_df["label"].values)

In [None]:
test_dataset = TensorDataset(input_ids_test, attention_masks_test)

In [None]:
test_dataloader = DataLoader(
    test_dataset,
    sampler=SequentialSampler(test_dataset),
    batch_size=2,)

In [None]:
device

In [None]:
model.to(device)

In [None]:
batch 

In [None]:
CUDA_LAUNCH_BLOCKING=1

In [None]:
emb_tst_list = []
#class_list = []
model.eval()
for batched in test_dataloader:
    
    batched = tuple(k.to(device) for k in batched)
    
    #classes = batch[-1]
    inputs = {'input_ids':      batched[0],
            'attention_mask': batched[1],
            }
  # Extract per-residue embeddings (on GPU)
    with torch.no_grad():
        outputs = model(**inputs)

    embeddings = outputs.last_hidden_state
    embeddings = embeddings.detach().cpu().numpy()
    #classes = classes.detach().cpu().numpy()
  # Generate per-sequence embeddings via averaging
    seq_tst_emb = np.mean(embeddings, axis=1)
    emb_tst_list.append(seq_tst_emb)
    #class_list.append(classes)

In [None]:
def cross_entroy_func(preds, labels):
    preds_flat = np.argmax(preds, axis=-1).flatten()
    labels_flat = labels.flatten()
    return log_loss(labels_flat, preds_flat)

#### evaluation

In [None]:
def test(model, test_loader, device):
    model.eval()
    
    predictions = []
    sigmoid = nn.Sigmoid()
    with torch.no_grad():
        for batch in test_loader:
            batch = tuple(b.to(device) for b in batch)
        
            inputs = {'input_ids':      batch[0],
                       'attention_mask': batch[1],
                      }
            b_input_ids = inputs['input_ids']
            b_input_mask = inputs['attention_mask']

            outputs = model(b_input_ids,attention_mask=b_input_mask)
            #outputs = sigmoid(self.model(inputs, sequence_len))
            _, preds = torch.max(outputs, dim=1)
            #logits = outputs.logits
       
            # Move logits and labels to CPU
            preds = preds.detach().cpu().numpy()
            #label_ids = inputs['labels'].cpu().numpy()
            predictions.append(preds)
        
            
    predictions = np.concatenate(predictions, axis=0)
    return predictions

In [None]:
#test(model,test_dataloader,"cuda")

#### load model

In [None]:
#model = ProteinClassifier(2)

In [None]:
os.getcwd()

In [None]:
#model_inf = model = ProteinClassifier(2)  # we do not specify pretrained=True, i.e. do not load default weights
#model_inf.load_state_dict(torch.load('/content/drive/MyDrive/Iovision_workspace/Logs/1model_train2.pth'))

In [None]:
#device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
#model.to(device)

In [None]:
#Test predictions and make the submission file 
test_perds = test(model_inf,test_dataloader,device)


In [None]:
test_predictions=test_perds
test_df["label"]=test_predictions
test_df[["ID","label"]].to_csv("./pred5.csv",index=False)