# Data Preparation

In [49]:
import pandas
import matplotlib.pyplot as plt
from collections import Counter
import string
import numpy as np

from tensorflow.keras import layers
from tensorflow.keras import losses

wf = pandas.read_csv("./EMPOLITICON.csv")
wf

Unnamed: 0,Country,Date,Speaker,Headline,Text_of_Speech,Designation,Running President/PM,Speech Link,Emotion,Context
0,Russia,16/07/2021,Vladimir Putin,Meeting of APEC Economic Leaders,"Madam Chair,\n\nColleagues,\n\nFirst of all, ...",President,Vladimir Putin,http://en.kremlin.ru/events/president/transcri...,OPTIMISM,DEVELOPMENT
1,Russia,2021-09-05 00:00:00,Vladimir Putin,Victory Parade on Red Square,"Citizens of Russia,\n\nDear veterans,\n\nComra...",President,Vladimir Putin,http://en.kremlin.ru/events/president/transcri...,JOY,NATIONALISM
2,Russia,2021-08-04 00:00:00,Vladimir Putin,Meeting on the results of implementing Preside...,"Good afternoon, colleagues.\n\nLet’s start.\n\...",President,Vladimir Putin,http://en.kremlin.ru/events/president/transcri...,NEUTRAL,DEVELOPMENT
3,Russia,21-11-2020,Vladimir Putin,G20 Summit,"Colleagues,\n\nThe scope of problems humanity ...",President,Vladimir Putin,http://en.kremlin.ru/events/president/transcri...,NEUTRAL,DEVELOPMENT
4,Russia,20-11-2020,Vladimir Putin,Address to participants in Nuremberg Lessons f...,"Colleagues, friends,\n\nFirst of all, I would ...",President,Vladimir Putin,http://en.kremlin.ru/events/president/transcri...,UPSET,EXTREMISM
...,...,...,...,...,...,...,...,...,...,...
2005,USA,05/02/2013,Barack Obama,\nRemarks by the President,"Good afternoon, everybody. \n\nI wanted to sa...",President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,NEUTRAL,DEVELOPMENT
2006,USA,04/02/2013,Barack Obama,\nRemarks by the President on Preventing Gun V...,"Hello, everybody. Please have a seat. Have a...",President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,NEUTRAL,OTHERS
2007,USA,02/02/2013,Barack Obama,\nWeekly Address: A Balanced Approach to Growi...,"Hi, everybody. \n\nIn the coming weeks, we fa...",President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,OPTIMISM,DEVELOPMENT
2008,USA,28/01/2013,Barack Obama,\nRemarks by the President Before Meeting with...,"Well, Vice President Biden and I just want to...",President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,OPTIMISM,OTHERS


In [50]:
# shuffing the data
wf = wf.sample(frac=1).reset_index(drop=True)
wf

Unnamed: 0,Country,Date,Speaker,Headline,Text_of_Speech,Designation,Running President/PM,Speech Link,Emotion,Context
0,USA,18/09/2013,Barack Obama,\nRemarks by the President at the Business Rou...,"Thank you, everybody. (Applause.) Well, Jim,...",President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,NEUTRAL,DEVELOPMENT
1,China,2019-04-27 00:00:00,H.E. Xi Jinping,Remarks by H.E. Xi Jinping President of the Pe...,"Ladies and Gentlemen,\nFriends from the Media,...",President,Xi Jinping,https://www.fmprc.gov.cn/mfa_eng/wjdt_665385/z...,NEUTRAL,DEVELOPMENT
2,USA,2021-10-14 00:00:00,Joe Biden,Remarks by President Biden on the COVID-⁠19 Re...,THE PRESIDENT: Good afternoon. I’ve just been ...,President,Joe Biden,https://www.whitehouse.gov/briefing-room/speec...,JOY,DEVELOPMENT
3,USA,09/05/2013,Barack Obama,\nRemarks by the President at Applied Material...,"Hello, Austin! How you doing? (Applause.) We...",President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,JOY,DEVELOPMENT
4,USA,30/04/2016,Barack Obama,\nWeekly Address: It’s Time for the Senate To ...,"Hi, everybody. It’s now been 45 days since I...",President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,OPTIMISM,NATIONALISM
...,...,...,...,...,...,...,...,...,...,...
2005,USA,2017-04-25 00:00:00,Donald Trump,Remarks by President Trump at United States Ho...,THE PRESIDENT: Thank you very much. Thank yo...,President,Donald Trump,https://trumpwhitehouse.archives.gov/briefings...,JOY,NATIONALISM
2006,USA,14/11/2015,Barack Obama,\nWeekly Address: Giving Veterans their Chance,"Hi, everybody. This week, America came toget...",President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,JOY,OTHERS
2007,China,2015-09-25 00:00:00,Xi Jinping,Remarks by H.E. Xi Jinping President of the Pe...,"Mr. President and Mrs. Obama,\nLadies and Gent...",President,Xi Jinping,https://www.fmprc.gov.cn/mfa_eng/wjdt_665385/z...,OPTIMISM,INTERNATIONAL AFFAIRS
2008,USA,2021-11-04 00:00:00,Joe Biden,Statement of President Joe Biden on Unemployme...,"Today we learned that, for the fifth consecuti...",President,Joe Biden,https://www.whitehouse.gov/briefing-room/state...,NEUTRAL,DEVELOPMENT


In [51]:
def count_unique_word(wf):
    count = Counter()
    for word in wf['Emotion']:
        count[word] += 1
    return count

counter = count_unique_word(wf)

In [52]:
print(counter.most_common())

[('NEUTRAL', 589), ('OPTIMISM', 583), ('JOY', 525), ('UPSET', 313)]


Setting labels for a multi-classification problem

In [53]:
#labeling the emotion column such that the most common emotion is 0, the second most common is 1 and so on
for i in range(len(wf)):
    if wf['Emotion'][i] == counter.most_common()[0][0]:
        wf['Emotion'][i] = 0
    elif wf['Emotion'][i] == counter.most_common()[1][0]:
        wf['Emotion'][i] = 1
    elif wf['Emotion'][i] == counter.most_common()[2][0]:
        wf['Emotion'][i] = 2
    elif wf['Emotion'][i] == counter.most_common()[3][0]:
        wf['Emotion'][i] = 3

#these labels will be used to train the model

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  wf['Emotion'][i] = 0
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because th

In [54]:
wf

Unnamed: 0,Country,Date,Speaker,Headline,Text_of_Speech,Designation,Running President/PM,Speech Link,Emotion,Context
0,USA,18/09/2013,Barack Obama,\nRemarks by the President at the Business Rou...,"Thank you, everybody. (Applause.) Well, Jim,...",President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,0,DEVELOPMENT
1,China,2019-04-27 00:00:00,H.E. Xi Jinping,Remarks by H.E. Xi Jinping President of the Pe...,"Ladies and Gentlemen,\nFriends from the Media,...",President,Xi Jinping,https://www.fmprc.gov.cn/mfa_eng/wjdt_665385/z...,0,DEVELOPMENT
2,USA,2021-10-14 00:00:00,Joe Biden,Remarks by President Biden on the COVID-⁠19 Re...,THE PRESIDENT: Good afternoon. I’ve just been ...,President,Joe Biden,https://www.whitehouse.gov/briefing-room/speec...,2,DEVELOPMENT
3,USA,09/05/2013,Barack Obama,\nRemarks by the President at Applied Material...,"Hello, Austin! How you doing? (Applause.) We...",President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,2,DEVELOPMENT
4,USA,30/04/2016,Barack Obama,\nWeekly Address: It’s Time for the Senate To ...,"Hi, everybody. It’s now been 45 days since I...",President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,1,NATIONALISM
...,...,...,...,...,...,...,...,...,...,...
2005,USA,2017-04-25 00:00:00,Donald Trump,Remarks by President Trump at United States Ho...,THE PRESIDENT: Thank you very much. Thank yo...,President,Donald Trump,https://trumpwhitehouse.archives.gov/briefings...,2,NATIONALISM
2006,USA,14/11/2015,Barack Obama,\nWeekly Address: Giving Veterans their Chance,"Hi, everybody. This week, America came toget...",President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,2,OTHERS
2007,China,2015-09-25 00:00:00,Xi Jinping,Remarks by H.E. Xi Jinping President of the Pe...,"Mr. President and Mrs. Obama,\nLadies and Gent...",President,Xi Jinping,https://www.fmprc.gov.cn/mfa_eng/wjdt_665385/z...,1,INTERNATIONAL AFFAIRS
2008,USA,2021-11-04 00:00:00,Joe Biden,Statement of President Joe Biden on Unemployme...,"Today we learned that, for the fifth consecuti...",President,Joe Biden,https://www.whitehouse.gov/briefing-room/state...,0,DEVELOPMENT


In [55]:
# wf = wf.head(739)

In [56]:
wf.iloc[76]['Text_of_Speech']

'We must realise the full complexity and importance of this moment in our centuries-old country’s historical development.\n\nYou know, practically from the moment a person is born, disease-inducing bacteria and health-threatening viruses enter his body. But if he grows up strong and healthy, his immune system suppresses these disease-causing germs and viruses. The minute his health weakens, however, they are all let loose and provoke an onslaught of life-threatening illnesses. That is what has happened here – the country, the state, became weakened and so we find ourselves now facing this onslaught. There is no sense in us now heaping particular blame on those who deliberately provoke this situation, I will speak about this separately, but these harmful elements are present inside each person’s body and within each state. What we need to do is improve the way the power system works and the country is managed. We need to create an effective economy. We need to restore the health of the 

In [57]:
## one thing we can do is to use the model from the presidential classifier and then, if a given speech is classified
## as angry or some other negative mood, and it includes the word "US, NATO", or something related to the west, we classify it as adversarial

In [58]:
#removing the punctuation

## Yet to apply this to the data

def remove_punctuation(text):
    translator = str.maketrans("", "", string.punctuation)
    return text.translate(translator)

In [59]:
#removing the stop words (commonly used words that do not add much meaning to a sentence)

## Yet to actually apply this function to the data

import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

stop = set(stopwords.words("english"))

def remove_stopwords(text):
    #removes stop words
    filtered_words = [word.lower() for word in text.split() if word.lower() not in stop]
    return " ".join(filtered_words)


[nltk_data] Downloading package stopwords to /home/yash/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [60]:
#cleaning up the speeches so that common stopwords and puntuation are removed
for i in range(len(wf['Text_of_Speech'])):
    wf['Text_of_Speech'][i] = remove_punctuation(wf['Text_of_Speech'][i])
    wf['Text_of_Speech'][i] = remove_stopwords(wf['Text_of_Speech'][i])

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  wf['Text_of_Speech'][i] = remove_punctuation(wf['Text_of_Speech'][i])
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to up

In [61]:
#drop rows without string in speech
for i in range(len(wf) - 1, -1, -1):
        if not (isinstance(wf.iloc[i]['Text_of_Speech'], str)):
                wf = wf.drop(i)

In [62]:
#counting the number of unique words in each speech


def count_unique_word(wf_training):
    count = Counter()
    for i in range(len(wf_training)):
        for word in wf_training.iloc[i]['Text_of_Speech'].split():
            count[word] += 1
    return count

counter = count_unique_word(wf)

In [63]:
counter

Counter({'people': 9977,
         '—': 8222,
         'new': 6587,
         'world': 6530,
         'work': 6325,
         'us': 6261,
         'china': 6246,
         'countries': 6156,
         'development': 6030,
         'one': 5367,
         'also': 5297,
         'country': 5190,
         'cooperation': 5139,
         'today': 4997,
         'need': 4856,
         'make': 4751,
         'thank': 4400,
         'years': 4319,
         'like': 4236,
         'must': 4210,
         'going': 4136,
         'want': 4067,
         'know': 4061,
         'would': 3998,
         'it’s': 3995,
         'time': 3944,
         'international': 3876,
         'economic': 3761,
         'many': 3709,
         'applause': 3698,
         'first': 3525,
         'we’re': 3520,
         '–': 3500,
         'year': 3467,
         'security': 3461,
         'united': 3394,
         'economy': 3278,
         'states': 3250,
         'that’s': 3248,
         'every': 3222,
         'great': 3150,
  

In [64]:
num_unique_words = len(counter)

In [65]:
print(len(wf))

2010


In [66]:
#80% training 20% testing split
wf_training = wf[:int(len(wf) * 0.8)]
wf_testing = wf[int(len(wf) * 0.8):]

In [67]:
wf_testing.reset_index(inplace=True)
wf_testing.drop('index', axis=1, inplace=True)
wf_testing

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wf_testing.drop('index', axis=1, inplace=True)


Unnamed: 0,Country,Date,Speaker,Headline,Text_of_Speech,Designation,Running President/PM,Speech Link,Emotion,Context
0,USA,25/07/2013,Barack Obama,\nRemarks by the President at Iftar Dinner,thank good evening everybody please seat let b...,President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,2,OTHERS
1,Russia,30-10-2015,Vladimir Putin,Security Council meeting,good afternoon colleagues subject agenda today...,President,Vladimir Putin,http://en.kremlin.ru/events/president/transcri...,0,DEVELOPMENT
2,USA,2021-08-09 00:00:00,Joe Biden,Statement by President Joe Biden on COVID-⁠19 ...,strongly support secretary austin’s message fo...,President,Joe Biden,https://www.whitehouse.gov/briefing-room/state...,2,DEVELOPMENT
3,USA,19/08/2016,Barack Obama,\nWeekly Address: Celebrating the 100th Annive...,hi everybody earlier summer michelle malia sas...,President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,2,DEVELOPMENT
4,Russia,26-04-2010,Dmitry Medvedev,Ceremony Awarding Russian Federation 65th Anni...,majesty ladies gentlemen dear norwegian friend...,President,Dmitry Medvedev,http://en.kremlin.ru/events/president/transcri...,2,NATIONALISM
...,...,...,...,...,...,...,...,...,...,...
397,USA,2017-04-25 00:00:00,Donald Trump,Remarks by President Trump at United States Ho...,president thank much thank friends members con...,President,Donald Trump,https://trumpwhitehouse.archives.gov/briefings...,2,NATIONALISM
398,USA,14/11/2015,Barack Obama,\nWeekly Address: Giving Veterans their Chance,hi everybody week america came together salute...,President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,2,OTHERS
399,China,2015-09-25 00:00:00,Xi Jinping,Remarks by H.E. Xi Jinping President of the Pe...,mr president mrs obama ladies gentlemen dear f...,President,Xi Jinping,https://www.fmprc.gov.cn/mfa_eng/wjdt_665385/z...,1,INTERNATIONAL AFFAIRS
400,USA,2021-11-04 00:00:00,Joe Biden,Statement of President Joe Biden on Unemployme...,today learned fifth consecutive week initial u...,President,Joe Biden,https://www.whitehouse.gov/briefing-room/state...,0,DEVELOPMENT


In [68]:
wf_training.reset_index(inplace=True)
wf_training.drop('index', axis=1, inplace=True)
wf_training

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wf_training.drop('index', axis=1, inplace=True)


Unnamed: 0,Country,Date,Speaker,Headline,Text_of_Speech,Designation,Running President/PM,Speech Link,Emotion,Context
0,USA,18/09/2013,Barack Obama,\nRemarks by the President at the Business Rou...,thank everybody applause well jim thank introd...,President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,0,DEVELOPMENT
1,China,2019-04-27 00:00:00,H.E. Xi Jinping,Remarks by H.E. Xi Jinping President of the Pe...,ladies gentlemen friends media good afternoon ...,President,Xi Jinping,https://www.fmprc.gov.cn/mfa_eng/wjdt_665385/z...,0,DEVELOPMENT
2,USA,2021-10-14 00:00:00,Joe Biden,Remarks by President Biden on the COVID-⁠19 Re...,president good afternoon i’ve briefed covid19 ...,President,Joe Biden,https://www.whitehouse.gov/briefing-room/speec...,2,DEVELOPMENT
3,USA,09/05/2013,Barack Obama,\nRemarks by the President at Applied Material...,hello austin applause well wonderful see today...,President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,2,DEVELOPMENT
4,USA,30/04/2016,Barack Obama,\nWeekly Address: It’s Time for the Senate To ...,hi everybody it’s 45 days since nominated judg...,President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,1,NATIONALISM
...,...,...,...,...,...,...,...,...,...,...
1603,Russia,24-03-2006,Vladimir Putin,Opening Remarks at the State Council Meeting o...,good afternoon dear colleagues today examining...,President,Vladimir Putin,http://en.kremlin.ru/events/president/transcri...,0,DEVELOPMENT
1604,Russia,26-11-2002,Vladimir Putin,Speech at a Meeting of the Armed Forces Command,good afternoon keeping tradition today reviewi...,President,Vladimir Putin,http://en.kremlin.ru/events/president/transcri...,0,DEVELOPMENT
1605,Russia,17-09-2009,Dmitry Medvedev,Dmitry Medvedev's statement in response to the...,“in russia duly noted president obamas stateme...,President,Dmitry Medvedev,http://en.kremlin.ru/events/president/transcri...,2,INTERNATIONAL AFFAIRS
1606,Russia,27-06-2016,Vladimir Putin,United Russia party congress,colleagues friends welcome delegates guests un...,President,Vladimir Putin,http://en.kremlin.ru/events/president/transcri...,0,OTHERS


In [69]:
wf_training

Unnamed: 0,Country,Date,Speaker,Headline,Text_of_Speech,Designation,Running President/PM,Speech Link,Emotion,Context
0,USA,18/09/2013,Barack Obama,\nRemarks by the President at the Business Rou...,thank everybody applause well jim thank introd...,President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,0,DEVELOPMENT
1,China,2019-04-27 00:00:00,H.E. Xi Jinping,Remarks by H.E. Xi Jinping President of the Pe...,ladies gentlemen friends media good afternoon ...,President,Xi Jinping,https://www.fmprc.gov.cn/mfa_eng/wjdt_665385/z...,0,DEVELOPMENT
2,USA,2021-10-14 00:00:00,Joe Biden,Remarks by President Biden on the COVID-⁠19 Re...,president good afternoon i’ve briefed covid19 ...,President,Joe Biden,https://www.whitehouse.gov/briefing-room/speec...,2,DEVELOPMENT
3,USA,09/05/2013,Barack Obama,\nRemarks by the President at Applied Material...,hello austin applause well wonderful see today...,President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,2,DEVELOPMENT
4,USA,30/04/2016,Barack Obama,\nWeekly Address: It’s Time for the Senate To ...,hi everybody it’s 45 days since nominated judg...,President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,1,NATIONALISM
...,...,...,...,...,...,...,...,...,...,...
1603,Russia,24-03-2006,Vladimir Putin,Opening Remarks at the State Council Meeting o...,good afternoon dear colleagues today examining...,President,Vladimir Putin,http://en.kremlin.ru/events/president/transcri...,0,DEVELOPMENT
1604,Russia,26-11-2002,Vladimir Putin,Speech at a Meeting of the Armed Forces Command,good afternoon keeping tradition today reviewi...,President,Vladimir Putin,http://en.kremlin.ru/events/president/transcri...,0,DEVELOPMENT
1605,Russia,17-09-2009,Dmitry Medvedev,Dmitry Medvedev's statement in response to the...,“in russia duly noted president obamas stateme...,President,Dmitry Medvedev,http://en.kremlin.ru/events/president/transcri...,2,INTERNATIONAL AFFAIRS
1606,Russia,27-06-2016,Vladimir Putin,United Russia party congress,colleagues friends welcome delegates guests un...,President,Vladimir Putin,http://en.kremlin.ru/events/president/transcri...,0,OTHERS


In [70]:
training_sentences = wf_training.Text_of_Speech.to_numpy()
#training labels
training_labels = wf_training.Emotion.to_numpy()

validation_sentences = wf_testing.Text_of_Speech.to_numpy()
#training labels
validation_labels = wf_testing.Emotion.to_numpy()

In [71]:
training_sentences.shape, validation_sentences.shape

((1608,), (402,))

In [72]:
from tensorflow.keras.preprocessing.text import Tokenizer

tokenizer = Tokenizer(num_words=num_unique_words)
tokenizer.fit_on_texts(training_sentences)

In [73]:
word_index = tokenizer.word_index

In [74]:
training_sequences = tokenizer.texts_to_sequences(training_sentences)
validation_sequences = tokenizer.texts_to_sequences(validation_sentences)

In [75]:
#padding sequences to have the same length
from tensorflow.keras.preprocessing.sequence import pad_sequences

#max number of words per sequence
max_length = 1000

training_padded = pad_sequences(training_sequences, maxlen=max_length, padding="post", truncating="post")
validation_padded = pad_sequences(validation_sequences, maxlen=max_length, padding="post", truncating="post")
training_padded.shape, validation_padded.shape

((1608, 1000), (402, 1000))

In [76]:
#LSTM Model
import tensorflow.keras as keras


model = keras.models.Sequential()

#input layer
#embedding layer converts the words into vectors of fixed size
model.add(layers.Embedding(num_unique_words, 32))
#dropout rate drops out a certain percentage of input data so that the
#model learns more robust ways of learning and does not overfit
model.add(layers.Bidirectional(layers.LSTM(64, dropout=0.2, recurrent_dropout=0.2)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(5, activation='softmax'))
model.summary()

I0000 00:00:1747960461.474952    1160 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1747960461.772289    1160 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1747960461.772383    1160 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1747960461.779127    1160 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1747960461.779178    1160 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:0

In [77]:
loss = 'sparse_categorical_crossentropy'
optim = keras.optimizers.Adam(learning_rate=0.001) #from 0.001
metrics = ["accuracy"]

model.compile(loss=loss, optimizer=optim, metrics=metrics)

In [78]:
print(training_padded.dtype, training_padded.shape)

int32 (1608, 1000)


In [79]:

training_labels = np.array(training_labels).astype(np.int64)
validation_labels = np.array(validation_labels).astype(np.int64)

In [80]:
print(training_padded.dtype, training_padded.shape)
print(training_labels.dtype, training_labels.shape)

int32 (1608, 1000)
int64 (1608,)


In [81]:
print(training_padded)

[[   17   186    30 ...  4837   437  1622]
 [  520   508   136 ...     0     0     0]
 [   60    46   662 ...     0     0     0]
 ...
 [ 6425    64 10009 ...     0     0     0]
 [  234   136   440 ...     0     0     0]
 [   46    87   338 ...     0     0     0]]


In [82]:
##making sure that overrepresentation of a class does not affect the model
from sklearn.utils import class_weight

class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(training_labels),
    y=training_labels
)
class_weights = dict(enumerate(class_weights))

In [None]:
model.fit(training_padded, training_labels, epochs=20, validation_data=(validation_padded, validation_labels), class_weight=class_weights, verbose=2)

# Changing to a Binary Classification Model

In [92]:
wf = pandas.read_csv("./EMPOLITICON.csv")
wf = wf.sample(frac=1).reset_index(drop=True)
wf

Unnamed: 0,Country,Date,Speaker,Headline,Text_of_Speech,Designation,Running President/PM,Speech Link,Emotion,Context
0,Russia,14-11-2003,Vladimir Putin,Opening Address at the Congress of the Russian...,"Good afternoon,\n\nYour forum has brought toge...",President,Vladimir Putin,http://en.kremlin.ru/events/president/transcri...,OPTIMISM,DEVELOPMENT
1,USA,24/05/2014,Barack Obama,\nWeekly Address: Paying Tribute to our Fallen...,"Hi, everybody. It’s Memorial Day weekend – a...",President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,UPSET,NATIONALISM
2,Russia,2005-03-08 00:00:00,Vladimir Putin,Introductory Remarks at a Meeting with Represe...,"Good afternoon, dear colleagues.\n\nI asked yo...",President,Vladimir Putin,http://en.kremlin.ru/events/president/transcri...,NEUTRAL,OTHERS
3,United Kingdom,24 December 2019,Boris Johnson,Prime Minister Boris Johnson's Christmas messa...,Watch the Prime Minister’s Christmas message h...,Prime Minister,Boris Johnson,https://www.gov.uk/government/speeches/prime-m...,JOY,OTHERS
4,Russia,2020-10-03 00:00:00,Vladimir Putin,Speech at State Duma plenary session,"Mr Volodin, State Duma deputies,\n\nMr Speaker...",President,Vladimir Putin,http://en.kremlin.ru/events/president/transcri...,OPTIMISM,OTHERS
...,...,...,...,...,...,...,...,...,...,...
2005,USA,09/01/2015,Barack Obama,\nRemarks by the President on America's Colleg...,"Hello, everybody! (Applause.) Hey! Thank yo...",President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,NEUTRAL,DEVELOPMENT
2006,Russia,2005-11-10 00:00:00,Vladimir Putin,Opening Address at the Session of the State Co...,Good afternoon dear colleagues!\n\nIncreasing ...,President,Vladimir Putin,http://en.kremlin.ru/events/president/transcri...,NEUTRAL,DEVELOPMENT
2007,USA,2017-06-12 00:00:00,Donald Trump,Remarks by President Trump in Welcoming the 20...,"THE PRESIDENT: Hello, everybody. Mr. Governo...",President,Donald Trump,https://trumpwhitehouse.archives.gov/briefings...,JOY,DEVELOPMENT
2008,United Kingdom,19 November 2020,Boris Johnson,PM statement to the House on the Integrated Re...,"Mr Speaker, thank you and with permission, I w...",Prime Minister,Boris Johnson,https://www.gov.uk/government/speeches/pm-stat...,NEUTRAL,DEVELOPMENT


In [93]:
def count_unique_word(wf):
    count = Counter()
    for word in wf['Emotion']:
        count[word] += 1
    return count

counter = count_unique_word(wf)

In [94]:
print(counter.most_common())

[('NEUTRAL', 589), ('OPTIMISM', 583), ('JOY', 525), ('UPSET', 313)]


In [95]:
#made a change so that NEUTRAL and UPSET are 0, OPTIMISM and JOY are 1
for i in range(len(wf)):
    if wf['Emotion'][i] == counter.most_common()[0][0]:
        wf['Emotion'][i] = 0
    elif wf['Emotion'][i] == counter.most_common()[1][0]:
        wf['Emotion'][i] = 1
    elif wf['Emotion'][i] == counter.most_common()[2][0]:
        wf['Emotion'][i] = 1
    elif wf['Emotion'][i] == counter.most_common()[3][0]:
        wf['Emotion'][i] = 0

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  wf['Emotion'][i] = 1
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because th

Same Process as the previous up to the LSTM Model

In [96]:
for i in range(len(wf['Text_of_Speech'])):
    wf['Text_of_Speech'][i] = remove_punctuation(wf['Text_of_Speech'][i])
    wf['Text_of_Speech'][i] = remove_stopwords(wf['Text_of_Speech'][i])

for i in range(len(wf) - 1, -1, -1):
        if not (isinstance(wf.iloc[i]['Text_of_Speech'], str)):
                wf = wf.drop(i)


counter = count_unique_word(wf)


num_unique_words = len(counter)



wf_training = wf[:int(len(wf) * 0.8)]
wf_testing = wf[int(len(wf) * 0.8):]



wf_testing.reset_index(inplace=True)
wf_testing.drop('index', axis=1, inplace=True)
wf_testing



wf_training.reset_index(inplace=True)
wf_training.drop('index', axis=1, inplace=True)
wf_training


training_sentences = wf_training.Text_of_Speech.to_numpy()
#training labels
training_labels = wf_training.Emotion.to_numpy()
validation_sentences = wf_testing.Text_of_Speech.to_numpy()
#training labels
validation_labels = wf_testing.Emotion.to_numpy()




tokenizer = Tokenizer(num_words=num_unique_words)
tokenizer.fit_on_texts(training_sentences)


word_index = tokenizer.word_index




training_sequences = tokenizer.texts_to_sequences(training_sentences)
validation_sequences = tokenizer.texts_to_sequences(validation_sentences)


max_length = 1000
training_padded = pad_sequences(training_sequences, maxlen=max_length, padding="post", truncating="post")
validation_padded = pad_sequences(validation_sequences, maxlen=max_length, padding="post", truncating="post")
training_padded.shape, validation_padded.shape


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  wf['Text_of_Speech'][i] = remove_punctuation(wf['Text_of_Speech'][i])
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to up

((1608, 1000), (402, 1000))

In [None]:
#Binary Classification Neural Network Model

model = keras.models.Sequential()

#input layer
#embedding layer converts the words into vectors of fixed size
model.add(layers.Embedding(num_unique_words, 32))
#dropout rate drops out a certain percentage of input data so that the
#model learns more robust ways of learning and does not overfit
model.add(layers.Bidirectional(layers.LSTM(64, dropout=0.2, recurrent_dropout=0.2)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.summary()

In [None]:
loss = keras.losses.BinaryCrossentropy(from_logits=False)
optim = keras.optimizers.Adam(learning_rate=0.004)
metrics = ["accuracy"]

model.compile(loss=loss, optimizer=optim, metrics=metrics)

In [99]:
training_labels = np.array(training_labels).astype(np.int64)
validation_labels = np.array(validation_labels).astype(np.int64)

In [100]:

class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(training_labels),
    y=training_labels
)
class_weights = dict(enumerate(class_weights))

In [102]:
print(class_weights)

{0: 1.1059147180192572, 1: 0.9125993189557321}


In [None]:
model.fit(training_padded, training_labels, epochs=30, validation_data=(validation_padded, validation_labels), class_weight=class_weights, verbose=2)

Epoch 1/8
51/51 - 331s - 6s/step - accuracy: 0.5354 - loss: 0.6937 - val_accuracy: 0.4353 - val_loss: 0.6942
Epoch 2/8
51/51 - 324s - 6s/step - accuracy: 0.4658 - loss: 0.6933 - val_accuracy: 0.4701 - val_loss: 0.6939
Epoch 3/8
51/51 - 325s - 6s/step - accuracy: 0.4608 - loss: 0.6933 - val_accuracy: 0.4925 - val_loss: 0.6934
Epoch 4/8
51/51 - 322s - 6s/step - accuracy: 0.4882 - loss: 0.6935 - val_accuracy: 0.5373 - val_loss: 0.6929
Epoch 5/8
51/51 - 323s - 6s/step - accuracy: 0.5193 - loss: 0.6934 - val_accuracy: 0.4353 - val_loss: 0.6940
Epoch 6/8


KeyboardInterrupt: 

# Using the BERT Tokenizer