In [2]:
# Import necessary libraries
import numpy as np
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from collections import defaultdict
from textwrap import wrap

# Torch ML libraries
import transformers
from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader

# Misc.
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Set the model name
#MODEL_NAME = 'bert-base-cased'
MODEL_NAME = 'allenai/scibert_scivocab_uncased'


# Build a BERT based tokenizer
#tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME, do_lower_case=True)

In [4]:
# Build the Sentiment Classifier class 
class SentimentClassifier(nn.Module):
    
    # Constructor class 
    def __init__(self, n_classes):
        super(SentimentClassifier, self).__init__()
        self.bert = BertModel.from_pretrained(MODEL_NAME)
        self.drop = nn.Dropout(p=0.3)
        self.out = nn.Linear(self.bert.config.hidden_size, n_classes)

    # Forward propagaion class
    
    def forward(self, input_ids, attention_mask):
        _, pooled_output = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask,
            return_dict=False
        )
        output = self.drop(pooled_output)
        return self.out(output)  




In [5]:
# Set GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class_names = ['negative', 'positive']
# Create an instance of your model
model = SentimentClassifier(len(class_names))
model = model.to(device)

# Load the model's state dictionary
model_path = './models/sci_bert_best_model_state_1000.bin'  # Path to the saved model file
state_dict = torch.load(model_path)

# Load the state dictionary into the model
model.load_state_dict(state_dict)

Some weights of the model checkpoint at allenai/scibert_scivocab_uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [6]:
review_text = "The authors propose a lightweight CNN model (< 1 MB) for locating potential tears in the knee on MRI images. The main contributions are two normalization layers (layer and contrast normalization) for 3D sub-images and the application of BlurPool downsampling. Promising results are shown on two knee datasets. The paper is well written and easy to follow. Even though the proposed model is lightweight (0.2M), it is shown to be on par or better than a recently published model called MRNet (183M parameters). The selected application (discovering knee tear) seems to be clinically relevant.  It is not entirely clear how crucial the proposed multi-slice normalization and BlurPool layers are. An ablation study and comparison to established methods like batch normalization would have been valuable. The method adopts approaches from the literature (instance normalization, BlurPool) and applies them to the problem of knee tear detection on 3D MRI data. The paper is well written but would benefit from an ablation study to better understand the value of the individual layers in comparison to the standard approach using batch normalization. The results and model size of the proposed approach are enticing."


In [7]:
MAX_LEN = 160

encoded_review = tokenizer.encode_plus(
    review_text,
    max_length=MAX_LEN,
    add_special_tokens=True,
    return_token_type_ids=False,
    pad_to_max_length=True,
    return_attention_mask=True,
    return_tensors='pt',
)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


In [8]:
input_ids = encoded_review['input_ids'].to(device)
attention_mask = encoded_review['attention_mask'].to(device)

output = model(input_ids, attention_mask)
_, prediction = torch.max(output, dim=1)

print(f'Review text: {review_text}')
print(f'Sentiment  : {class_names[prediction]}')


Review text: The authors propose a lightweight CNN model (< 1 MB) for locating potential tears in the knee on MRI images. The main contributions are two normalization layers (layer and contrast normalization) for 3D sub-images and the application of BlurPool downsampling. Promising results are shown on two knee datasets. The paper is well written and easy to follow. Even though the proposed model is lightweight (0.2M), it is shown to be on par or better than a recently published model called MRNet (183M parameters). The selected application (discovering knee tear) seems to be clinically relevant.  It is not entirely clear how crucial the proposed multi-slice normalization and BlurPool layers are. An ablation study and comparison to established methods like batch normalization would have been valuable. The method adopts approaches from the literature (instance normalization, BlurPool) and applies them to the problem of knee tear detection on 3D MRI data. The paper is well written but wo