## This notebook contains demo for our fine-tuned Analyst Tone model. We fine-tuned FinBERT model on 10,000 manually annotated analyst statements. You can use this script and infer sentiment on your customerized dataset.

In [2]:
# download pre-trained and fine-tuned weights, unzip to the working directory
# https://gohkust-my.sharepoint.com/:u:/g/personal/imyiyang_ust_hk/EQJGiEOkhIlBqlW63TbKA3gBCYgDDcHlBCB7VTXIUMmyiA

In [1]:
import sys 
print(sys.version)

3.7.3 (default, Mar 27 2019, 16:54:48) 
[Clang 4.0.1 (tags/RELEASE_401/final)]


In [2]:
import os
import copy
import numpy as np
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from pytorch_pretrained_bert import BertTokenizer, BertModel, BertForMaskedLM, BertConfig
from bertModel import BertClassification, dense_opt
from datasets import text_dataset, financialPhraseBankDataset
import argparse
from sklearn.metrics import f1_score

In [24]:
labels = {0:'neutral', 1:'positive',2:'negative'}
num_labels= len(labels)
vocab ="finance-uncased"
vocab_path = '/Users/svetlana/Downloads/analyst_tone-2/vocab' 
pretrained_weights_path ='/Users/svetlana/Downloads/analyst_tone-2/pretrained_weights'
fine_tuned_weight_path = '/Users/svetlana/Downloads/analyst_tone-2/fine_tuned.pth'   
max_seq_length=256
device=torch.device("cpu")

In [25]:
model = BertClassification(weight_path = pretrained_weights_path, 
                           num_labels=num_labels, vocab=vocab)

In [44]:
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

Model's state_dict:
bert.embeddings.word_embeddings.weight 	 torch.Size([30873, 768])
bert.embeddings.position_embeddings.weight 	 torch.Size([512, 768])
bert.embeddings.token_type_embeddings.weight 	 torch.Size([2, 768])
bert.embeddings.LayerNorm.weight 	 torch.Size([768])
bert.embeddings.LayerNorm.bias 	 torch.Size([768])
bert.encoder.layer.0.attention.self.query.weight 	 torch.Size([768, 768])
bert.encoder.layer.0.attention.self.query.bias 	 torch.Size([768])
bert.encoder.layer.0.attention.self.key.weight 	 torch.Size([768, 768])
bert.encoder.layer.0.attention.self.key.bias 	 torch.Size([768])
bert.encoder.layer.0.attention.self.value.weight 	 torch.Size([768, 768])
bert.encoder.layer.0.attention.self.value.bias 	 torch.Size([768])
bert.encoder.layer.0.attention.output.dense.weight 	 torch.Size([768, 768])
bert.encoder.layer.0.attention.output.dense.bias 	 torch.Size([768])
bert.encoder.layer.0.attention.output.LayerNorm.weight 	 torch.Size([768])
bert.encoder.layer.0.attention.outp

bert.encoder.layer.11.intermediate.dense.weight 	 torch.Size([3072, 768])
bert.encoder.layer.11.intermediate.dense.bias 	 torch.Size([3072])
bert.encoder.layer.11.output.dense.weight 	 torch.Size([768, 3072])
bert.encoder.layer.11.output.dense.bias 	 torch.Size([768])
bert.encoder.layer.11.output.LayerNorm.weight 	 torch.Size([768])
bert.encoder.layer.11.output.LayerNorm.bias 	 torch.Size([768])
bert.pooler.dense.weight 	 torch.Size([768, 768])
bert.pooler.dense.bias 	 torch.Size([768])
classifier.weight 	 torch.Size([3, 768])
classifier.bias 	 torch.Size([3])


In [47]:
model.load_state_dict(torch.load(fine_tuned_weight_path, map_location=device))
model.to(device)

BertClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30873, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): BertLayerNorm()
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): BertLayerNorm()
              (dropout): Dropout(p=0.1, inplace=False)
            )


# 0 is neutral, 1 is positive, and 2 is negative 

In [99]:
import glob
text_path = '/Users/svetlana/Desktop/Creation/*'
files = glob.glob(text_path)

In [101]:
for file in files:
    with open(file) as f:
        sentences = f.readlines()

In [102]:
sentences

['\ufeffHitachi and SBI, the largest state-owned commercial bank in India,\n',
 'enter into Joint Venture to accelerate Digital Payments in India\n',
 'Partnership to establish a state-of-the-art card acceptance\n',
 'and future ready digital payments platform\n',
 'MUMBAI, October 29, 2018 --- Hitachi, Ltd. (TSE: 6501, "Hitachi") today announced that Hitachi Payment Services Pvt. Ltd. ("Hitachi Payments"), a wholly-owned subsidiary based in India of Hitachi, and State Bank of India ("SBI") have signed a definitive agreement to enter into a joint venture for the establishment of a state-of-the-art card acceptance and future ready digital payments platform for India.\n',
 'It is planned that Hitachi Payments will invest [26%] to SBI Payment Services Pvt. Ltd. ("SBI Payment"), a wholly-owned subsidiary of SBI, and through this investment, SBI Payment will be a joint venture between both parties. Both parties will proceed to apply for regulatory approvals. \n',
 'Financial services market

In [103]:
tokenizer = BertTokenizer(vocab_file = vocab_path, do_lower_case = True, do_basic_tokenize = True)

In [104]:
model.eval()
for sent in sentences: 
    tokenized_sent = tokenizer.tokenize(sent)
    if len(tokenized_sent) > max_seq_length:
        tokenized_sent = tokenized_sent[:max_seq_length]
    
    ids_review  = tokenizer.convert_tokens_to_ids(tokenized_sent)
    mask_input = [1]*len(ids_review)        
    padding = [0] * (max_seq_length - len(ids_review))
    ids_review += padding
    mask_input += padding
    input_type = [0]*max_seq_length
    
    input_ids = torch.tensor(ids_review).to(device).reshape(-1, 256)
    attention_mask =  torch.tensor(mask_input).to(device).reshape(-1, 256)
    token_type_ids = torch.tensor(input_type).to(device).reshape(-1, 256)
    
    with torch.set_grad_enabled(False):
        outputs = model(input_ids, token_type_ids, attention_mask)
        outputs = F.softmax(outputs,dim=1)
        print(sent, '\nFinBERT predicted sentiment: ', labels[torch.argmax(outputs).item()], '\n')
    

﻿Hitachi and SBI, the largest state-owned commercial bank in India,
 
FinBERT predicted sentiment:  neutral 

enter into Joint Venture to accelerate Digital Payments in India
 
FinBERT predicted sentiment:  neutral 

Partnership to establish a state-of-the-art card acceptance
 
FinBERT predicted sentiment:  neutral 

and future ready digital payments platform
 
FinBERT predicted sentiment:  positive 

MUMBAI, October 29, 2018 --- Hitachi, Ltd. (TSE: 6501, "Hitachi") today announced that Hitachi Payment Services Pvt. Ltd. ("Hitachi Payments"), a wholly-owned subsidiary based in India of Hitachi, and State Bank of India ("SBI") have signed a definitive agreement to enter into a joint venture for the establishment of a state-of-the-art card acceptance and future ready digital payments platform for India.
 
FinBERT predicted sentiment:  neutral 

It is planned that Hitachi Payments will invest [26%] to SBI Payment Services Pvt. Ltd. ("SBI Payment"), a wholly-owned subsidiary of SBI, and th