In [1]:
import torch.nn as nn
import torch.nn.functional as F
import torch
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import pandas as pd
data = pd.read_csv('US_nn_plus.csv')
#Combine the last three columns into one simple string column
data['text'] = data['movie'] + ' ' + data['synopsis'] + ' ' + data['taglines']
data.drop(['movie', 'synopsis', 'taglines'], axis=1, inplace=True)

In [6]:
from transformers import BertTokenizer, BertModel
tokenizer = BertTokenizer.from_pretrained('F:\Bert')
text_input = data['text']
text_input = text_input.fillna('').apply(lambda x: tokenizer.encode(x, add_special_tokens=False))
#Make sure all sequences have the same length 512
text_input = [i[:512] for i in text_input]
attention_mask = [[1] * len(i) + [0] * (512 - len(i)) for i in text_input]
text_input = [i + [0] * (512 - len(i)) for i in text_input]
bert = BertModel.from_pretrained('F:\Bert').to('cuda')
text_input = torch.tensor(text_input).to('cuda')
attention_mask = torch.tensor(attention_mask).to('cuda')
bert(text_input[0].unsqueeze(0))
output = torch.zeros((len(text_input), 512, 1024)).to('cuda')
bert.eval()
with torch.no_grad():
    for i in range(len(text_input) // 64 + 1):
        torch.cuda.empty_cache()
        output[i:i+64]  = bert(text_input[i:i+64], attention_mask=attention_mask[i:i+64]).last_hidden_state
        print('Current progress:', i / (len(text_input) // 64 + 1))
del bert
del text_input
del attention_mask
output.to('cpu')
text_input = output
del output
torch.cuda.empty_cache()
torch.save(text_input, 'text_input.pt')




In [1]:
import torch.nn as nn
import torch.nn.functional as F
import torch
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import pandas as pd
data = pd.read_csv('US_nn_plus.csv')
#Combine the last three columns into one simple string column
data['text'] = data['movie'] + ' ' + data['synopsis'] + ' ' + data['taglines']
data.drop(['movie', 'synopsis', 'taglines'], axis=1, inplace=True)
text_input = torch.load('text_input.pt')
profit_mean = data['profit'].mean()
profit_std = data['profit'].std()
def encode_profit(x):
    return (x - profit_mean) / profit_std
def decode_profit(x):
    return x * profit_std + profit_mean
data['profit'] = data['profit'].apply(encode_profit)
text_input = text_input.float()
class CustomDataset(Dataset):
    def __init__(self, data, text_input):
        self.input = torch.tensor(data[data.columns[4:-1]].values, dtype=torch.float32)
        self.text_input = text_input[:len(self.input)]
        self.output = torch.tensor(data[data.columns[0:4]].values, dtype=torch.float32)
    def __getitem__(self, index):
        x = self.input[index]
        x_text = self.text_input[index]
        y_profits = self.output[index, 0]
        y_recom = self.output[index, 1:]
        return x, x_text, y_profits, y_recom
    def __len__(self):
        return len(self.input)
dataset = CustomDataset(data,text_input)
train, val = torch.utils.data.random_split(dataset, [0.8,0.2])
train_loader = DataLoader(train, batch_size=32, shuffle=True)
val_loader = DataLoader(val, batch_size=len(val), shuffle=True)
class Net(nn.Module):
    def __init__(self,drop=0.3):
        super(Net, self).__init__()
        self.attention = nn.MultiheadAttention(1024, 1,dropout=drop)
        self.fc_for_text = nn.Linear(1024*512, 16)
        self.fc = nn.Linear(13, 16)
        self.bn1 = nn.BatchNorm1d(16)
        self.dropout = nn.Dropout(drop)
        self.fc2 = nn.Linear(32, 16)
        self.bn2 = nn.BatchNorm1d(16)
        self.dropout2 = nn.Dropout(drop)
        self.fc3 = nn.Linear(16, 4)
    def forward (self, x, x_text):
        x_text = self.attention(x_text, x_text, x_text)[0]
        x_text = self.fc_for_text(x_text.view(x_text.shape[0], -1))
        x = self.fc(x)
        x = self.bn1(x)
        x = self.dropout(x)
        x = F.relu(self.fc2(torch.cat([x, x_text], axis=1)))
        x = self.bn2(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        return x
def loss_fn(x, y_profits, y_recom):
    profits = x[:,0]
    recom = F.softmax(x[:,1:], dim=1)
    return F.binary_cross_entropy(recom, y_recom)
model = Net(0.5).to('cuda')

In [2]:
optimizer = optim.Adam(model.parameters(), lr=0.001,weight_decay=0.05)
#Use simple scheduler
sceduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
def train_one_epoch(model, optimizer, train_loader):
    model.train()
    for x, x_text, y_profits, y_recom in train_loader:
        x = x.to('cuda')
        x_text = x_text.to('cuda')
        y_profits = y_profits.to('cuda')
        y_recom = y_recom.to('cuda')
        optimizer.zero_grad()
        output = model(x, x_text)
        loss = loss_fn(output, y_profits, y_recom)
        loss.backward()
        optimizer.step()
        torch.cuda.empty_cache()
    return loss.item()/len(train_loader)
def val_loss(model, val):
    model.eval()
    with torch.no_grad():
        for x, x_text, y_profits, y_recom in val:
            x = x.to('cuda')
            x_text = x_text.to('cuda')
            y_profits = y_profits.to('cuda')
            y_recom = y_recom.to('cuda')
            output = model(x, x_text)
            loss = loss_fn(output, y_profits, y_recom)
        return loss.item()

In [3]:
for i in range(100):
    print('Epoch:', i)
    print('Train loss:', train_one_epoch(model, optimizer, train_loader))
    print('Validation loss:', val_loss(model, val_loader))

Epoch: 0
Train loss: 0.05221553643544515
Validation loss: 48.779212951660156
Epoch: 1
Train loss: 0.04620980421702067
Validation loss: 33.96809005737305
Epoch: 2
Train loss: 0.04406023820241292
Validation loss: 7.5883049964904785
Epoch: 3
Train loss: 0.043439420064290364
Validation loss: 33.95083236694336
Epoch: 4
Train loss: 0.04233328501383463
Validation loss: 36.65122604370117
Epoch: 5
Train loss: 0.044561461607615156
Validation loss: 36.9450798034668
Epoch: 6
Train loss: 0.04455483357111613
Validation loss: 37.90485382080078
Epoch: 7
Train loss: 0.03754165569941203
Validation loss: 40.11179733276367
Epoch: 8
Train loss: 0.03922600746154785
Validation loss: 45.906349182128906
Epoch: 9
Train loss: 0.03644063472747803
Validation loss: 51.04633331298828
Epoch: 10
Train loss: 0.03459502061208089
Validation loss: 56.226009368896484
Epoch: 11
Train loss: 0.03842264413833618
Validation loss: 26.189849853515625
Epoch: 12
Train loss: 0.03296025196711223
Validation loss: 19.115825653076172
Ep

In [4]:
torch.save({"model":model.to('cpu'), "parameters":model.to('cpu').state_dict()}, 'model.pth')

In [5]:
# model.load_state_dict(torch.load('model.pth')['parameters'])

In [6]:
def recom_accuracy(x,x_text,y_recom):
    recom = F.softmax(model(x, x_text)[:,1:], dim=1)
    recom = torch.argmax(recom, dim=1)
    y_recom = torch.argmax(y_recom, dim=1)
    return torch.sum(recom == y_recom).item() / len(y_recom)

In [7]:
torch.cuda.empty_cache()

In [10]:
#Caclulate the accuracy of all datasets
model.eval()
model.to('cuda')
x_all = torch.tensor(data[data.columns[4:-1]].values, dtype=torch.float32).to('cuda')
x_text_all = text_input.to('cuda')
y_recommend_all = torch.tensor(data[data.columns[1:4]].values, dtype=torch.float32).to('cuda')
acc_sum = 0
for i in range(len(x_all)//32 + 1):
    torch.cuda.empty_cache()
    x = x_all[i*32:(i+1)*32]
    x_text = x_text_all[i*32:(i+1)*32]
    y_recommend = y_recommend_all[i*32:(i+1)*32]
    acc_sum += recom_accuracy(x, x_text, y_recommend)*x.shape[0]
print('Recommendation accuracy:', acc_sum/len(x_all))
#Calulate Val accuracy
torch.cuda.empty_cache()
for x_val, x_text_val, y_profits_val, y_recommend_val in val_loader:
    x_val = x_val.to('cuda')
    x_text_val = x_text_val.to('cuda')
    y_recommend_val = y_recommend_val.to('cuda')
    y_profits_val = y_profits_val.to('cuda')
acc_sum_val = 0
for i in range(len(x_val)//32 + 1):
    torch.cuda.empty_cache()
    x = x_val[i*32:(i+1)*32]
    x_text = x_text_val[i*32:(i+1)*32]
    y_recommend = y_recommend_val[i*32:(i+1)*32]
    acc_sum_val += recom_accuracy(x, x_text, y_recommend)*x.shape[0]
print('Val Recommendation accuracy:', acc_sum_val/len(x_val))

Recommendation accuracy: 0.7566666666666667
Val Recommendation accuracy: 0.7


In [9]:
data['is_poor'].sum() / len(data)

0.7566666666666667