In [8]:
import torch
from torch import nn
from torch.utils.data import DataLoader
import numpy as np
import pandas as pd
import transformers
from transformers import AutoModel
import sys
import math
sys.path.insert(0,'..')

from utils.preprocessing import load_data
from utils.transformer_dataset import ReviewDataset
from utils.training import train_text_model, train_text_meta_model, train_meta_model
from models.transformer_models import UsefulScoreRegressorTextOnly, UsefulScoreRegressorAllFeat, UsefulScoreRegressorMetaOnly

In [2]:
# Load data
train, val = load_data('../data/drugsComTrain_raw.csv')

In [3]:
# See dataframe
train

Unnamed: 0,uniqueID,drugName,condition,review,rating,date,usefulCount,cleanReview,usefulScore,ratingNormalized,...,ADHD,Acne,Anxiety,Bipolar Disorde,Birth Control,Depression,Insomnia,Obesity,Pain,Weight Loss
126080,110122,Nexplanon,Birth Control,"""Hello, Ive had Nexplanon for four years (just...",8,2016-04-19,9,"Hello, Ive had Nexplanon for four years (just ...",0.306739,0.8,...,0,0,0,0,1,0,0,0,0,0
123803,6499,Wellbutrin XL,Depression,"""I started taking Wellbutrin XL August of 2016...",10,2017-09-15,16,I started taking Wellbutrin XL August of 2016....,0.387062,1.0,...,0,0,0,0,0,1,0,0,0,0
35971,39194,Contrave,Weight Loss,"""I tried Contrave for 5 days. I was just takin...",5,2017-04-19,8,I tried Contrave for 5 days. I was just taking...,0.290296,0.5,...,0,0,0,0,0,0,0,0,0,1
38384,137414,Isotretinoin,Acne,"""Accutane is an isotretinoin- the most powerfu...",10,2010-12-13,13,Accutane is an isotretinoin- the most powerful...,0.358074,1.0,...,0,1,0,0,0,0,0,0,0,0
89258,211662,Lamotrigine,Bipolar Disorde,"""I started on Lamictal after having manic epis...",10,2017-07-10,39,I started on Lamictal after having manic episo...,0.511444,1.0,...,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25083,104070,Ethinyl estradiol / levonorgestrel,Birth Control,"""My favorite birth control by far! I have had ...",10,2013-11-13,22,My favorite birth control by far! I have had n...,0.431519,1.0,...,0,0,0,0,1,0,0,0,0,0
129693,210966,Mirtazapine,Anxiety,"""At 19 I went on Sertraline as I&#039;d strugg...",9,2015-11-29,65,At 19 I went on Sertraline as I'd struggled wi...,0.582757,0.9,...,0,0,1,0,0,0,0,0,0,0
62647,220149,Amitriptyline,Pain,"""I&#039;m 20 years old and have both fibromyal...",10,2015-08-17,100,I'm 20 years old and have both fibromyalgia an...,0.642895,1.0,...,0,0,0,0,0,0,0,0,1,0
147595,45327,Fluoxetine,Depression,"""I was prescribed prozac for depression about ...",8,2015-08-10,13,I was prescribed prozac for depression about 3...,0.358074,0.8,...,0,0,0,0,0,1,0,0,0,0


#### Develop BERT Model

In [4]:
##### Create pytorch dataset
nonTextCols = ['ratingNormalized', 'ageScore', 'ADHD', 'Acne', 'Anxiety', 'Bipolar Disorde',
                'Birth Control', 'Depression', 'Insomnia', 'Obesity', 'Pain', 'Weight Loss']
targetCol = 'usefulScore'

trainset = ReviewDataset(train, 'roberta-base', nonTextCols, targetCol)
valset = ReviewDataset(val, 'roberta-base', nonTextCols, targetCol)
train_loader = DataLoader(dataset=trainset, batch_size=8, shuffle=True)
val_loader = DataLoader(dataset=valset, batch_size=8, shuffle=False)

#### Train Models (Frozen Transformer Weights)

#### Text-only Model

In [5]:
##### Text-only Transformer Model
encoder = AutoModel.from_pretrained('roberta-base', return_dict=True)

# Freeze encoder parameters to avoid CUDA out of memory.
for param in encoder.parameters():
    param.requires_grad = False

model = UsefulScoreRegressorTextOnly(encoder)
model = model.cuda()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

train_text_model(num_epochs=3, model=model, optimizer=optimizer,
                 train_loader=train_loader, val_loader=val_loader,
                 criterion=criterion, save_path='../models/RoBERTa_Frozen_TextOnly_Clip.pt', clip=1.0)

Epoch 0, val loss: inf -> 0.00274, train loss: 0.00334
Epoch 1, val loss: 0.00274 -> 0.00273, train loss: 0.00307
Epoch 2, val loss: 0.00293, train loss: 0.00305


#### Text + Metadata model

In [6]:
##### Text + metadata Transformer Model
encoder = AutoModel.from_pretrained('roberta-base', return_dict=True)

# Freeze encoder parameters to avoid CUDA out of memory.
for param in encoder.parameters():
    param.requires_grad = False

model = UsefulScoreRegressorAllFeat(encoder, num_meta_feats=len(nonTextCols))
model = model.cuda()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

train_text_meta_model(num_epochs=3, model=model, optimizer=optimizer,
                      train_loader=train_loader, val_loader=val_loader,
                      criterion=criterion, save_path='../models/RoBERTa_Frozen_TextMeta_Clip.pt', clip=1.0)

Epoch 0, val loss: inf -> 0.00220, train loss: 0.00268
Epoch 1, val loss: 0.00220 -> 0.00214, train loss: 0.00242
Epoch 2, val loss: 0.00214 -> 0.00213, train loss: 0.00236


In [7]:
#### Check how the model is performing across each metadata feature group
#### Especially important to look at performance by age of review, to see if performance is good for young reviews
#### In practice, the newly posted reviews would be the ones that the model would help prioritize.

#### Train Models (Free Transformer Weights)

#### Text-only Model

In [8]:
##### Text-only Transformer Model
encoder = AutoModel.from_pretrained('roberta-base', return_dict=True)

model = UsefulScoreRegressorTextOnly(encoder)
model = model.cuda()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

train_text_model(num_epochs=3, model=model, optimizer=optimizer,
                 train_loader=train_loader, val_loader=val_loader,
                 criterion=criterion, save_path='../models/RoBERTa_Free_TextOnly_Clip.pt', clip=1.0)

RuntimeError: CUDA out of memory. Tried to allocate 12.00 MiB (GPU 0; 11.00 GiB total capacity; 9.52 GiB already allocated; 8.50 MiB free; 9.61 GiB reserved in total by PyTorch)

#### Text + Metadata Model

In [None]:
##### Text + metadata Transformer Model
encoder = AutoModel.from_pretrained('roberta-base', return_dict=True)

model = UsefulScoreRegressorAllFeat(encoder, num_meta_feats=len(nonTextCols))
model = model.cuda()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

train_text_meta_model(num_epochs=3, model=model, optimizer=optimizer,
                      train_loader=train_loader, val_loader=val_loader,
                      criterion=criterion, save_path='../models/RoBERTa_Free_TextMeta_Clip.pt', clip=1.0)

#### Train Meta-only Model

#### With Gradient Clipping

In [9]:
model = UsefulScoreRegressorMetaOnly(num_meta_feats=len(nonTextCols))
model = model.cuda()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

train_meta_model(num_epochs=3, model=model, optimizer=optimizer,
                 train_loader=train_loader, val_loader=val_loader,
                 criterion=criterion, save_path='../models/RoBERTa_MetaOnly_Clip.pt', clip=1.0)

Epoch 0, val loss: inf -> 0.00221, train loss: 0.00271
Epoch 1, val loss: 0.00221 -> 0.00218, train loss: 0.00231
Epoch 2, val loss: 0.00218 -> 0.00218, train loss: 0.00227


#### Without Gradient Clipping

In [10]:
model = UsefulScoreRegressorMetaOnly(num_meta_feats=len(nonTextCols))
model = model.cuda()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

train_meta_model(num_epochs=3, model=model, optimizer=optimizer,
                 train_loader=train_loader, val_loader=val_loader,
                 criterion=criterion, save_path='../models/RoBERTa_MetaOnly_NoClip.pt', clip=1000.0)

Epoch 0, val loss: inf -> 0.00224, train loss: 0.00244
Epoch 1, val loss: 0.00224 -> 0.00220, train loss: 0.00230
Epoch 2, val loss: 0.00220, train loss: 0.00228
