### Test Loading the TorchScript Model

In [5]:
import torch
import pandas as pd

In [20]:
model_path = "sentiment_rnn.pt"
model = torch.jit.load(model_path, map_location=torch.device('cpu'))
model.eval()

RecursiveScriptModule(
  original_name=SentimentRNN
  (embedding): RecursiveScriptModule(original_name=Embedding)
  (lstm): RecursiveScriptModule(original_name=LSTM)
  (dropout): RecursiveScriptModule(original_name=Dropout)
  (fc): RecursiveScriptModule(original_name=Linear)
  (sig): RecursiveScriptModule(original_name=Sigmoid)
)

In [7]:
data = pd.read_csv("data.csv")
data.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


In [12]:
from string import punctuation
import re
from sklearn.model_selection import train_test_split
from collections import Counter
import nltk
nltk.download('stopwords')

def remove_punc(text):
    text = text.lower()
    return ("".join(i for i in text if i not in punctuation))

data["review"] = data["review"].apply(remove_punc)
print(data.head())
X = data["review"].values
y = data["sentiment"].values
X_train,X_test,y_train,y_test = train_test_split(X,y,stratify=y)
print(X_train.shape)
print(X_test.shape)

def process(string):
    string = re.sub(r"[^\w\s]", '', string)
    string = re.sub(r"\d", '', string)
    string = re.sub(r"\s+", '', string)
    return string

def tokenize(X_train,y_train,X_test,y_test):
    words = []
    stop_words = set(stopwords.words('english')) 
    for x in X_train:
        for word in x.split():
            word = process(word)
            if word not in stop_words and word != '':
                words.append(word)
                
    counts = Counter(words)
    vocab = sorted(counts, key=counts.get, reverse=True)[:1000]
    vocab_to_int = {word: ii for ii, word in enumerate(vocab,1)}
    new_X_train = []
    new_X_test = []
    for s in X_train:
            new_X_train.append([vocab_to_int[process(word)] for word in s.split() 
                                     if process(word) in vocab_to_int.keys()])
    for s in X_test:
            new_X_test.append([vocab_to_int[process(word)] for word in s.split() 
                                    if process(word) in vocab_to_int.keys()])
            
    new_y_train = [1 if label =='positive' else 0 for label in y_train]  
    new_y_test = [1 if label =='positive' else 0 for label in y_test]
    return new_X_train, new_y_train,new_X_test, new_y_test, vocab_to_int

X_train,y_train,X_test,y_test,vocab_to_int = tokenize(X_train,y_train,X_test,y_test)

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/edbertwidjaja/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


                                              review sentiment
0  one of the other reviewers has mentioned that ...  positive
1  a wonderful little production br br the filmin...  positive
2  i thought this was a wonderful way to spend ti...  positive
3  basically theres a family where a little boy j...  negative
4  petter matteis love in the time of money is a ...  positive
(37500,)
(12500,)


In [13]:
def padding(sentence, seqLength):
    #determine shape
    features = np.zeros((len(sentence), seqLength), dtype=int)
    for i, row in enumerate(sentence):
        if len(row) != 0:
            features[i, -len(row):] = np.array(row)[:seqLength]
    return features

In [32]:
import numpy as np

def init_hidden(batch_size, device='cpu'):
    hidden_dim = 256 
    num_layers = 2    
    
    return (
        torch.zeros(num_layers, batch_size, hidden_dim).to(device),
        torch.zeros(num_layers, batch_size, hidden_dim).to(device)
    )
    
def tokenize_review(test_review):
    test_review = test_review.lower()
    test_text = ''.join([i for i in test_review if i not in punctuation])
    test_words = test_text.split()
    test_ints = []
    test_ints.append([vocab_to_int.get(word, 0) for word in test_words])
    return test_ints

def predict(net, test_review, sequence_length=500):
    model.eval()
    test_ints = tokenize_review(test_review)
    seq_length=sequence_length
    features = padding(test_ints, seq_length)
    feature_tensor = torch.from_numpy(features)
    batch_size = feature_tensor.size(0)
    h = init_hidden(batch_size)
    with torch.no_grad():
        output, h= model(feature_tensor,h)
    #print('Prediction value: {:.6f}'.format(output.item()))
    if(output.item() > 0.5):
        print("Positive market sentiment detected! With probability of:",output.item())
    else:
        print("Negative market sentiment detected! With probability of:", (1 - output.item()))
        
test_review = 'The worst movie I have seen; acting was terrible and I want my money back. This movie had bad acting and the dialogue was slow.'
predict(model, test_review, 500) 

Negative market sentiment detected! With probability of: 0.674791008234024


In [29]:
import finnhub
from dotenv import load_dotenv
import os
from datetime import datetime
from dateutil.relativedelta import relativedelta

load_dotenv()

current_date = datetime.today()
start_date = current_date - relativedelta(months=1)
finnhub_client = finnhub.Client(api_key=os.getenv("FINNHUB_API_KEY"))
ALL_NEWS = finnhub_client.company_news('NVDA', _from=start_date.strftime('%Y-%m-%d'), to=current_date.strftime('%Y-%m-%d'))

In [31]:
all_text = []
for news in ALL_NEWS:
    text = news['headline'] + " " + news['summary']
    all_text.append(text)
    
for text in all_text:
    print("Text:", text)
    predict(model, text, 500)
    print("\n")

Text: What Nvidia's Jensen Huang and OpenAI's Sam Altman Had to Say This Week About AI and Jobs Nvidia CEO Jensen Huang said this week he expects “everybody’s jobs will be changed" by artificial intelligence.
Prediction value: 0.717717
Positive review detected! With probability of: 0.7177172303199768


Text: Intel: Even A Giant Leap Of Faith May Not Be Enough Intel's turnaround under CEO Tan faces major structural, technological, and competitive hurdles. Read more on the INTC stock and if there are any prospects for a turnaround.
Prediction value: 0.717692
Positive review detected! With probability of: 0.7176918983459473


Text: Should You Forget Nvidia and Buy These 2 Artificial Intelligence (AI) Stocks Instead? Nvidia is the king of AI accelerator chips, but some smaller AI players may have more upside.  AMD has a big opportunity as the AI inference market grows.  Broadcom has big opportunities in networking, custom AI chips, and virtualization.
Prediction value: 0.717600
Positive re