In [None]:
%tensorflow_version 2.x
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, MaxPooling1D, Conv1D, GlobalMaxPooling1D, Dropout, LSTM, GRU
from tensorflow.keras import utils
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import utils
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.


In [None]:
!gdown --id 1DCN9d2SSKokvYqjowCllXtyBdZksm6W7

Downloading...
From: https://drive.google.com/uc?id=1DCN9d2SSKokvYqjowCllXtyBdZksm6W7
To: /content/twitter_training.csv.zip
100% 2.02M/2.02M [00:00<00:00, 192MB/s]


In [None]:
!gdown --id 1q9325AXgdrZYf_CjpFkyVHl02YL_n2nh

Downloading...
From: https://drive.google.com/uc?id=1q9325AXgdrZYf_CjpFkyVHl02YL_n2nh
To: /content/twitter_validation.csv
100% 164k/164k [00:00<00:00, 114MB/s]


In [None]:
!unzip -q twitter_training.csv.zip

replace twitter_training.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y


In [None]:
!gdown --id 1rOcuH9ovXUzelE-fnRuJASB58_Wh_iM8

Downloading...
From: https://drive.google.com/uc?id=1rOcuH9ovXUzelE-fnRuJASB58_Wh_iM8
To: /content/classes.txt
100% 39.0/39.0 [00:00<00:00, 195kB/s]


In [None]:
!head twitter_validation.csv

3364,Facebook,Irrelevant,"I mentioned on Facebook that I was struggling for motivation to go for a run the other day, which has been translated by Tom’s great auntie as ‘Hayley can’t get out of bed’ and told to his grandma, who now thinks I’m a lazy, terrible person 🤣"
352,Amazon,Neutral,BBC News - Amazon boss Jeff Bezos rejects claims company acted like a 'drug dealer' bbc.co.uk/news/av/busine…
8312,Microsoft,Negative,@Microsoft Why do I pay for WORD when it functions so poorly on my @SamsungUS Chromebook? 🙄
4371,CS-GO,Negative,"CSGO matchmaking is so full of closet hacking, it's a truly awful game."
4433,Google,Neutral,Now the President is slapping Americans in the face that he really did commit an unlawful act after his  acquittal! From Discover on Google vanityfair.com/news/2020/02/t…
6273,FIFA,Negative,"Hi @EAHelp I’ve had Madeleine McCann in my cellar for the past 13 years and the little sneaky thing just escaped whilst I was loading up some fifa points, she took my card and

In [None]:
!wc -l twitter_validation.csv

1758 twitter_validation.csv


In [None]:
train = pd.read_csv('twitter_validation.csv',
                    header=None,
                    names=[ 'title', 'class' ,'text'])

In [None]:
train = train[['class', 'title', 'text']]

In [None]:
train = train[['class', 'title', 'text']]

In [None]:
train

Unnamed: 0,class,title,text
3364,1,Facebook,I mentioned on Facebook that I was struggling ...
352,4,Amazon,BBC News - Amazon boss Jeff Bezos rejects clai...
8312,2,Microsoft,@Microsoft Why do I pay for WORD when it funct...
4371,2,CS-GO,"CSGO matchmaking is so full of closet hacking,..."
4433,4,Google,Now the President is slapping Americans in the...
...,...,...,...
4891,1,GrandTheftAuto(GTA),⭐️ Toronto is the arts and culture capital of ...
4359,1,CS-GO,tHIS IS ACTUALLY A GOOD MOVE TOT BRING MORE VI...
2652,3,Borderlands,Today sucked so it’s time to drink wine n play...
8069,3,Microsoft,Bought a fraction of Microsoft today. Small wins.


In [None]:
news = train['text'].astype(str)
news[:5]

3364    I mentioned on Facebook that I was struggling ...
352     BBC News - Amazon boss Jeff Bezos rejects clai...
8312    @Microsoft Why do I pay for WORD when it funct...
4371    CSGO matchmaking is so full of closet hacking,...
4433    Now the President is slapping Americans in the...
Name: text, dtype: object

In [None]:
num_words = 100000
max_news_len = 170
nb_classes = 4

In [None]:
y_train = utils.to_categorical(train['class'] - 1, nb_classes)

In [None]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(train['class'])
y_train = utils.to_categorical(encoded_labels, nb_classes)

In [None]:
y_train

array([[1., 0., 0., 0.],
       [0., 0., 0., 1.],
       [0., 1., 0., 0.],
       ...,
       [0., 0., 1., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]], dtype=float32)

In [None]:
tokenizer = Tokenizer(num_words=num_words)
tokenizer.fit_on_texts(news)

In [None]:
sequences = tokenizer.texts_to_sequences(news)


In [None]:
x_train = pad_sequences(sequences, maxlen=max_news_len)
x_train[:5]


array([[   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    5, 1879,   13,   83,   19,    5,   35, 1138,    9,
        1880,    2,   66,    9,    4,  420,    1,  300,  128,  280,   44,
          68, 1881,   51, 1882,  115, 

In [None]:
model = Sequential()
model.add(Embedding(num_words, 64, input_length=max_news_len))
model.add(Conv1D(128, 5, padding='valid', activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(nb_classes, activation='softmax'))

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

model.fit(x_train, y_train, epochs=10, batch_size=128, validation_split=0.1)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 170, 64)           6400000   
                                                                 
 conv1d_2 (Conv1D)           (None, 166, 128)          41088     
                                                                 
 global_max_pooling1d_2 (Glo  (None, 128)              0         
 balMaxPooling1D)                                                
                                                                 
 dense_4 (Dense)             (None, 64)                8256      
                                                                 
 dropout_2 (Dropout)         (None, 64)                0         
                                                                 
 dense_5 (Dense)             (None, 4)                 260       
                                                      

<keras.callbacks.History at 0x7ff72ae1bac0>

In [None]:
model_cnn_save_path = 'modelposneg.h5'
checkpoint_callback_cnn = ModelCheckpoint(model_cnn_save_path,
                                      monitor='val_accuracy',
                                      save_best_only=True,
                                      verbose=1)

In [None]:
history_cnn = model.fit(x_train,
                            y_train,
                            epochs=5,
                            batch_size=128,
                            validation_split=0.1,
                            callbacks=[checkpoint_callback_cnn])

Epoch 1/5
Epoch 1: val_accuracy improved from -inf to 0.56000, saving model to modelposneg.h5
Epoch 2/5
Epoch 2: val_accuracy did not improve from 0.56000
Epoch 3/5
Epoch 3: val_accuracy did not improve from 0.56000
Epoch 4/5
Epoch 4: val_accuracy did not improve from 0.56000
Epoch 5/5
Epoch 5: val_accuracy improved from 0.56000 to 0.57000, saving model to modelposneg.h5


In [None]:
model.load_weights(model_cnn_save_path)

In [None]:
from tensorflow import keras
model = keras.models.load_model('modelposneg.h5')

In [None]:
news_article = "A 38-year-old woman in the US, who was apprehended twice for allegedly trying to jump the White House fence last week, has been arrested for scaling a fence at the Treasury Building next to the White House."

news_article_sequence = tokenizer.texts_to_sequences([news_article])
news_article_sequence = pad_sequences(news_article_sequence, maxlen=max_news_len)
predictions = model.predict(news_article_sequence)
print(predictions)

[[0.33581215 0.17303495 0.2681341  0.22301883]]


In [None]:
class_labels = ['Irrelevant', 'Negative', 'Positive', 'Neutral']
predictions = model.predict(news_article_sequence)
predicted_class_index = np.argmax(predictions)
predicted_class_label = class_labels[predicted_class_index]
print("Predicted Class:", predicted_class_label)


Predicted Class: Irrelevant


In [None]:
import pickle
with open('tokenizerposneg.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
from google.colab import drive
drive.mount('/content/drive')
model.save('/content/drive/MyDrive/POSNEGMODEL/modelposneg.h5')
with open('/content/drive/MyDrive/POSNEGMODEL/tokenizerposneg.pickle', 'wb') as file:
    pickle.dump(tokenizer, file)

Mounted at /content/drive
