In [81]:
from LSTM_bidi import *                      # the LSTM_bidi file contains functions for processing a text, performing 
                                             # sentiment analysis with deep learning and explaining the result with LRP
    
from heatmap import html_heatmap             # the heatmap file contains the functions for converting the relevances
                                             # obtained by a LRP to readable heatmaps

import codecs                                # codecs is a package used to code or decodes text to bytes
import numpy as np                           # NumPy is the package for array computing 
from IPython.display import display, HTML    # IPython.display is a public API for display tools in IPython

In [82]:
import pandas as pd
import numpy as np
# Load the dataset to a pandas DataFrame
df = pd.read_csv('Apple.csv')    
df.head(5)

Unnamed: 0,News,ds
0,The reputation of big tech faltered as scandal...,2019-01-03
1,Investors seek safe haven as analysts warn of ...,2019-01-04
2,Investors seek safe haven as analysts warn of ...,2019-01-05
3,Investors seek safe haven as analysts warn of ...,2019-01-06
4,Company flags fewer people upgrading iPhones i...,2019-01-07


In [83]:
# Function to process the text in the 'News' column
def process_text(news):
    news = news.lower()
    news = news.replace(",", " ")
    news = news.replace(".", " ")
    news = news.split()
    return news

# Apply the function to the 'News' column
df['News'] = df['News'].apply(process_text)

# Print the processed text
print(df['News'])

0       [the, reputation, of, big, tech, faltered, as,...
1       [investors, seek, safe, haven, as, analysts, w...
2       [investors, seek, safe, haven, as, analysts, w...
3       [investors, seek, safe, haven, as, analysts, w...
4       [company, flags, fewer, people, upgrading, iph...
                              ...                        
1820    [there, is, a, noticeable, gap, between, tech,...
1821    [contract, manufacturer, taking, over, iphone,...
1822    [contract, manufacturer, taking, over, iphone,...
1823    [contract, manufacturer, taking, over, iphone,...
1824    [landmark, eu, regulation, and, crucial, us, c...
Name: News, Length: 1825, dtype: object


In [84]:
def remove_invalid_words(text):
    """Removes words from text that are not in the Stanford Sentiment Treebank dataset"""
    net  = LSTM_bidi()         # load in the trained neural network
    words = text.copy()        # create a copy of the text
    for w in text:             # remove all words that are not in the Standord Sentiment Treebank
        if w not in net.voc:
            words.remove(w)
    return(words)

In [85]:
# Apply the remove_invalid_words function to the 'News' column
df['News_'] = df['News'].apply(remove_invalid_words)

# Print the processed text
print(df)

                                                   News          ds  \
0     [the, reputation, of, big, tech, faltered, as,...  2019-01-03   
1     [investors, seek, safe, haven, as, analysts, w...  2019-01-04   
2     [investors, seek, safe, haven, as, analysts, w...  2019-01-05   
3     [investors, seek, safe, haven, as, analysts, w...  2019-01-06   
4     [company, flags, fewer, people, upgrading, iph...  2019-01-07   
...                                                 ...         ...   
1820  [there, is, a, noticeable, gap, between, tech,...  2023-12-28   
1821  [contract, manufacturer, taking, over, iphone,...  2023-12-29   
1822  [contract, manufacturer, taking, over, iphone,...  2023-12-30   
1823  [contract, manufacturer, taking, over, iphone,...  2023-12-31   
1824  [landmark, eu, regulation, and, crucial, us, c...  2024-01-01   

                                                  News_  
0     [the, reputation, of, big, as, scandals, emerg...  
1            [seek, safe, as, w

The classes in which the text can be classified are defined here. The sentiment classes are encoded the following way: 
<br> **0 = Very negative, 1 = Negative, 2 = Neutral, 3 = Positive, 4 = Very positive**

A list called sentiment_coding is created; it consists of the 5 elements with the text "Very negative", "Negative" etc. in the order from 0-4. 

In [86]:
sentiment_coding = ["Very negative", "Negative", "Neutral", "Positive", "Very positive"]

In [87]:
def predict(words):
    """Returns the classifier's predicted class"""
    net                 = LSTM_bidi()                                   # load trained LSTM model
    w_indices           = [net.voc.index(w) for w in words]             # convert input sentence to word IDs
    net.set_input(w_indices)                                            # set LSTM input sequence
    scores              = net.forward()                                 # classification prediction scores
    return np.argmax(scores)   

In [88]:
# Apply the predict function to the 'News' column and store the result in the 'Predict' column
df['Predict'] = df['News_'].apply(predict)
print(df)

                                                   News          ds  \
0     [the, reputation, of, big, tech, faltered, as,...  2019-01-03   
1     [investors, seek, safe, haven, as, analysts, w...  2019-01-04   
2     [investors, seek, safe, haven, as, analysts, w...  2019-01-05   
3     [investors, seek, safe, haven, as, analysts, w...  2019-01-06   
4     [company, flags, fewer, people, upgrading, iph...  2019-01-07   
...                                                 ...         ...   
1820  [there, is, a, noticeable, gap, between, tech,...  2023-12-28   
1821  [contract, manufacturer, taking, over, iphone,...  2023-12-29   
1822  [contract, manufacturer, taking, over, iphone,...  2023-12-30   
1823  [contract, manufacturer, taking, over, iphone,...  2023-12-31   
1824  [landmark, eu, regulation, and, crucial, us, c...  2024-01-01   

                                                  News_  Predict  
0     [the, reputation, of, big, as, scandals, emerg...        2  
1            

In [89]:
# Add a new 'Sentiment' column based on the predicted class
df['Sentiment'] = df['Predict'].apply(lambda x: sentiment_coding[x])

# Print the DataFrame with the new 'Sentiment' column
print(df)

                                                   News          ds  \
0     [the, reputation, of, big, tech, faltered, as,...  2019-01-03   
1     [investors, seek, safe, haven, as, analysts, w...  2019-01-04   
2     [investors, seek, safe, haven, as, analysts, w...  2019-01-05   
3     [investors, seek, safe, haven, as, analysts, w...  2019-01-06   
4     [company, flags, fewer, people, upgrading, iph...  2019-01-07   
...                                                 ...         ...   
1820  [there, is, a, noticeable, gap, between, tech,...  2023-12-28   
1821  [contract, manufacturer, taking, over, iphone,...  2023-12-29   
1822  [contract, manufacturer, taking, over, iphone,...  2023-12-30   
1823  [contract, manufacturer, taking, over, iphone,...  2023-12-31   
1824  [landmark, eu, regulation, and, crucial, us, c...  2024-01-01   

                                                  News_  Predict Sentiment  
0     [the, reputation, of, big, as, scandals, emerg...        2   Neu

In [90]:
# LRP hyperparameters:
eps                 = 0.001                                                  # small positive number
bias_factor         = 0.0                                                    # recommended value

In [91]:
net  = LSTM_bidi()         

In [92]:
# Function to perform LRP and compute LRP relevances on each row
def perform_lrp(row):
    w_indices = [net.voc.index(w) for w in row['News_']]  # convert input sentence to word IDs
    Rx, Rx_rev, R_rest = net.lrp(w_indices, row['Predict'], eps, bias_factor)  # perform LRP
    R_words = np.sum(Rx + Rx_rev, axis=1)  # compute word-level LRP relevances
    scores = net.s.copy()  # classification prediction scores
    return Rx, Rx_rev, R_rest, R_words, scores

# Apply the perform_lrp function to the DataFrame and store results in new columns
df[['Rx', 'Rx_rev', 'R_rest', 'R_words', 'Scores']] = df.apply(perform_lrp, axis=1, result_type='expand')
df

Unnamed: 0,News,ds,News_,Predict,Sentiment,Rx,Rx_rev,R_rest,R_words,Scores
0,"[the, reputation, of, big, tech, faltered, as,...",2019-01-03,"[the, reputation, of, big, as, scandals, emerg...",2,Neutral,"[[0.0007449967513893371, -0.000830202481497705...","[[-0.0018061026835473604, -0.00277085657246500...",0.0,"[0.4203628357521281, 0.13390970641769306, -0.0...","[-1.9761565412117736, 0.5497123486108692, 1.75..."
1,"[investors, seek, safe, haven, as, analysts, w...",2019-01-04,"[seek, safe, as, warn, of, more, on, fear]",1,Negative,"[[0.005489463116452319, 0.0004154020816229863,...","[[-0.001910417449816583, 6.028454618998357e-05...",0.0,"[0.019281313147067717, -0.43518703696217154, 0...","[0.6563181000887129, 2.3293149010110383, 1.030..."
2,"[investors, seek, safe, haven, as, analysts, w...",2019-01-05,"[seek, safe, as, warn, of, more, on, fear]",1,Negative,"[[0.005489463116452319, 0.0004154020816229863,...","[[-0.001910417449816583, 6.028454618998357e-05...",0.0,"[0.019281313147067717, -0.43518703696217154, 0...","[0.6563181000887129, 2.3293149010110383, 1.030..."
3,"[investors, seek, safe, haven, as, analysts, w...",2019-01-06,"[seek, safe, as, warn, of, more, on, fear]",1,Negative,"[[0.005489463116452319, 0.0004154020816229863,...","[[-0.001910417449816583, 6.028454618998357e-05...",0.0,"[0.019281313147067717, -0.43518703696217154, 0...","[0.6563181000887129, 2.3293149010110383, 1.030..."
4,"[company, flags, fewer, people, upgrading, iph...",2019-01-07,"[company, flags, fewer, people, in, rare, warn...",1,Negative,"[[-0.0026273033587368455, 0.000139176705481672...","[[-1.210268378144136e-05, -0.00045703706428114...",0.0,"[0.019592250531083814, -0.11293834189188923, 0...","[-0.2562151215377475, 1.2819844409837318, 0.84..."
...,...,...,...,...,...,...,...,...,...,...
1820,"[there, is, a, noticeable, gap, between, tech,...",2023-12-28,"[there, is, a, noticeable, gap, between, marke...",1,Negative,"[[0.00013890295532375686, 0.000850587365521831...","[[-0.0007620596036001973, 0.002276177649121771...",0.0,"[0.011444394308333971, -0.0528168194384385, -0...","[0.5342927884436623, 2.015608378167208, 0.8196..."
1821,"[contract, manufacturer, taking, over, iphone,...",2023-12-29,"[contract, taking, over, assembly, factory, fr...",1,Negative,"[[-0.002193527942096943, -0.000265054996608077...","[[-0.0027652654871889817, -0.00141971304456276...",0.0,"[0.1056789146432329, -0.30117662303504095, 0.0...","[0.06995588084992715, 1.758464286339057, 0.935..."
1822,"[contract, manufacturer, taking, over, iphone,...",2023-12-30,"[contract, taking, over, assembly, factory, fr...",1,Negative,"[[-0.002193527942096943, -0.000265054996608077...","[[-0.0027652654871889817, -0.00141971304456276...",0.0,"[0.1056789146432329, -0.30117662303504095, 0.0...","[0.06995588084992715, 1.758464286339057, 0.935..."
1823,"[contract, manufacturer, taking, over, iphone,...",2023-12-31,"[contract, taking, over, assembly, factory, fr...",1,Negative,"[[-0.002193527942096943, -0.000265054996608077...","[[-0.0027652654871889817, -0.00141971304456276...",0.0,"[0.1056789146432329, -0.30117662303504095, 0.0...","[0.06995588084992715, 1.758464286339057, 0.935..."


In [104]:
def mapp(row, i):
    
    display(HTML(html_heatmap(df['News_'][i], df['R_words'][i],max(df['Scores'][i]),min(df['Scores'][i]))))
i=int(input())
mapp(df,i)

1377


In [101]:
import pickle
pickle.dump(df, open('df.pkl','wb'))

df = pickle.load(open('df.pkl','rb'))
