In [1]:
# Load general libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load preprocessing libraries
import preprocessor as p
import unicodedata as ud
from sklearn.model_selection import train_test_split

# Load classifier libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Embedding, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from sklearn.model_selection import GridSearchCV

# Load evaluation libraries
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score, precision_score, recall_score, precision_recall_fscore_support, classification_report

In [2]:
# Retrieve 2015 dataset from Github (!!!!!should probably change link at some point)
url = "https://raw.githubusercontent.com/dimosbele/sentiment_analysis_greek/master/modeling/data/df_final.pkl"
dataset = pd.read_pickle(url)

X = np.array(dataset["Text"])
y = np.array(dataset["Sentiment"])

In [3]:
# Load imbalance libraries
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler

def resampling(X_train, y_train, strategy='auto'):
  #Oversample minority classes(positive, negative) to numbers of majority class(neutral)
  if strategy == 'auto':
    ros = RandomOverSampler(sampling_strategy=strategy, random_state=42)
    X_res, y_res = ros.fit_resample(X_train, y_train)
  #Oversample minority class(positive) to numbers of majority class(neutral), then undersample positive and neutral to numbers of negative class
  elif strategy == 'combination':
    ros = RandomOverSampler(sampling_strategy='minority', random_state=42)
    X_ros, y_ros = ros.fit_resample(X_train, y_train)
    rus = RandomUnderSampler(sampling_strategy='auto', random_state=42)
    X_res, y_res = rus.fit_resample(X_ros, y_ros)

  return X_res, y_res

In [4]:
# Preprocess tweets
def preprocess_tweets(X):
    X = clean_text(X)
    return X

def clean_text(X):
    X = [tweet.lower() for tweet in X]
    
    # Remove URLS, mentions
    p.set_options(p.OPT.URL, p.OPT.MENTION)
    X = [p.clean(tweet) for tweet in X]

    # Remove Greek accents
    d = {ord('\N{COMBINING ACUTE ACCENT}'):None}
    X = [ud.normalize('NFD',word).translate(d) for word in X]

    return X

X = preprocess_tweets(X)

In [5]:
# Change labels from [-1, 0, 1] to [0, 1, 2]
for i in range(len(y)):
    y[i] += 1

# Split dataset to train-val-test (70-10-20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42, stratify=y)

X_train, X_val , y_train, y_val = train_test_split(X_train, y_train, test_size = 0.125, random_state=42, stratify=y_train)

print("Training samples:", len(y_train))
print("Validation samples:", len(y_val))
print("Test samples:", len(y_test))

Training samples: 1148
Validation samples: 164
Test samples: 328


In [173]:
#X_train = np.array([[sentence] for sentence in X_train])


In [174]:
#X_train, y_train = resampling(X_train, y_train, strategy='auto')

In [6]:
from nltk.tokenize import word_tokenize

# Combine all the tweets into a single string
all_tweets = ' '.join(X_train)

# Tokenize the string
words = word_tokenize(all_tweets)

# Get the number of unique words
unique_words = set(words)
num_unique_words = len(unique_words)

print("Number of unique words:", num_unique_words)

Number of unique words: 3717


In [7]:
# Tokenization
tokenizer = Tokenizer(num_words=num_unique_words)
tokenizer.fit_on_texts(X_train)

# Convert text to sequences
sequences_train = tokenizer.texts_to_sequences(X_train)
sequences_val = tokenizer.texts_to_sequences(X_val)
sequences_test = tokenizer.texts_to_sequences(X_test)

# Pad sequences to the same length
max_length = max([len(seq) for seq in sequences_train])

padded_sequences_train = tf.keras.utils.pad_sequences(sequences_train, maxlen=max_length)
padded_sequences_val = tf.keras.utils.pad_sequences(sequences_val, maxlen=max_length)
padded_sequences_test = tf.keras.utils.pad_sequences(sequences_test, maxlen=max_length)

# Print the preprocessed and tokenized tweets
for i, tweet in enumerate(X_train):
    print(f"Tweet {i+1}: {tweet}")
    print(f"Padded Sequence train {i+1}: {padded_sequences_train[i]}")
    
print("\n")
for j, tweet in enumerate(X_test):
    print(f"Tweet {j+1}: {tweet}")
    print(f"Padded Sequence test {j+1}: {padded_sequences_test[j]}")

Tweet 1: ποιος εχει προβαδισμα; - νεα δημοσκοπηση, βγαζει μπροστα τον συριζα
Padded Sequence train 1: [   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0  284   49   54   34   45 1219   80   19    1]
Tweet 2: αυτο ειναι το ψηφοδελτιο επικρατειας του συριζα
Padded Sequence train 2: [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 51 12  3
 30 32  2  1]
Tweet 3: εκλογες 2015: κυριακος μητσοτακης: «η ν.δ. θα σχηματισει κυβερνηση με φιλοευρωπαϊκα κομματα»
Padded Sequence train 3: [   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0   31   43  672  673  210   58   55   11  674  134    7 1220 1221]
Tweet 4: ο γιος του γιωργου παπαδακη στο σποτ του συριζα
Padded Sequence train 4: [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   4 454   2 241 675  18  38   2   1]
Tweet 5: το ψηφοδελτιο επικρατειας του συριζα
Padded Sequence train 5: [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  

Padded Sequence train 280: [   0    0    0    0    0    0    0    0    0    0    0    0  245  881
    3  415  882    7  269   25  578    7  570    1    5 1775 1776 1777]
Tweet 281: 3,3 % και 2,7 % η διαφορα συριζα-νδ σε 2 gallop. τσιμα-τσιμα στη βουλη το πασοκ #ekloges2015
Padded Sequence train 281: [   0    0    0    0    0    0    0    0    0   72   72    5  133  218
    8  222    1   10   20  133 1778  883  883   24  198    3   21   47]
Tweet 282: μηνυμα υποψηφιου βουλευτη αρτας με την νδ χρηστου παπαζογλου
Padded Sequence train 282: [   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0  255  267  110  884    7   14   10  885 1779]
Tweet 283: τα ψηφοδελτια της στη θεσσαλονικη παρουσιασε η νδ
Padded Sequence train 283: [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0  15  41   6  24 579 207   8  10]
Tweet 284: newsbeast.gr | πολιτικη : ο λαζοπουλος στο στοχαστρο της νδ
Padded Sequence train 284: [   0    0   

In [8]:
print(np.count_nonzero(y_train == 2))
print(np.count_nonzero(y_train == 0))
padded_sequences_train, y_train = resampling(padded_sequences_train, y_train, strategy='auto')
print(np.count_nonzero(y_train == 2))
print(np.count_nonzero(y_train == 0))

55
408
685
685


## Model 1

In [9]:
#one-hot encoding for the labels
from tensorflow.keras.utils import to_categorical  

y_train_encoded = to_categorical(y_train, num_classes=3)  # Ειναι απαραίτητο για να μπορέσω να εκπαίδευσω, αλλιώς χτυπούσε error
y_val_encoded = to_categorical(y_val, num_classes=3)
y_test_encoded = to_categorical(y_test,num_classes=3)

print("max_length: ",max_length)
print("tokenizer.word_index: ",len(tokenizer.word_index))

max_length:  28
tokenizer.word_index:  3709


In [10]:
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=100, input_length=max_length))
model.add(LSTM(units=100, return_sequences=True))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=64, activation='relu'))
model.add(Dense(units=32, activation='relu'))
model.add(Dense(units=3, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model with validation data
model.fit(padded_sequences_train, y_train_encoded, epochs=8, batch_size=64, validation_data=(padded_sequences_val, y_val_encoded))

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x18bdc4a6f10>

In [11]:
predictions = model.predict(padded_sequences_test)
classes = np.argmax(predictions, axis=1)
predictions_model_1 = np.array(classes)


# Calculate accuracy
print("Model 1",'\n')
accuracy = accuracy_score(y_test, predictions_model_1)
print(f"Accuracy: {accuracy:.4f}")

report = classification_report(y_test, predictions_model_1)
print("Classification Report:")
print(report)


# Calculate confusion matrix
confusion_mat = confusion_matrix(y_test, predictions_model_1)
print("Confusion Matrix:")
print(confusion_mat)

Model 1 

Accuracy: 0.8354
Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.81      0.80       116
           1       0.88      0.88      0.88       196
           2       0.62      0.50      0.55        16

    accuracy                           0.84       328
   macro avg       0.76      0.73      0.74       328
weighted avg       0.83      0.84      0.83       328

Confusion Matrix:
[[ 94  20   2]
 [ 21 172   3]
 [  4   4   8]]


## Model 2

In [12]:
model_2 = Sequential()
model_2.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=100, input_length=max_length))
model_2.add(LSTM(units=100))
model_2.add(Dense(units=32, activation='relu'))
model_2.add(Dense(units=3, activation='softmax'))

# Compile the model
model_2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model with validation data
model_2.fit(padded_sequences_train, y_train_encoded, epochs=18, batch_size=64, validation_data=(padded_sequences_val, y_val_encoded))

Epoch 1/18
Epoch 2/18
Epoch 3/18
Epoch 4/18
Epoch 5/18
Epoch 6/18
Epoch 7/18
Epoch 8/18
Epoch 9/18
Epoch 10/18
Epoch 11/18
Epoch 12/18
Epoch 13/18
Epoch 14/18
Epoch 15/18
Epoch 16/18
Epoch 17/18
Epoch 18/18


<keras.callbacks.History at 0x18be00e7be0>

In [13]:
predictions_2 = model_2.predict(padded_sequences_test)
classes = np.argmax(predictions_2, axis=1)
predictions_model_2 = np.array(classes)

print("Model 2",'\n')
# Calculate accuracy
accuracy = accuracy_score(y_test, predictions_model_2)
print(f"Accuracy: {accuracy:.4f}")

report = classification_report(y_test, predictions_model_2)
print("Classification Report:")
print(report)

# Calculate confusion matrix
confusion_mat = confusion_matrix(y_test, predictions_model_2)
print("Confusion Matrix:")
print(confusion_mat)

Model 2 

Accuracy: 0.8079
Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.68      0.74       116
           1       0.83      0.91      0.87       196
           2       0.50      0.44      0.47        16

    accuracy                           0.81       328
   macro avg       0.71      0.68      0.69       328
weighted avg       0.80      0.81      0.80       328

Confusion Matrix:
[[ 79  33   4]
 [ 14 179   3]
 [  5   4   7]]


## Δοκιμή για διάφορες τιμές του output_dim στο layers των embendings

In [33]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

for dimension in [10, 1000, 10000, 100000]:
    print("Model 2 with output_dim:", dimension)

    model_2 = Sequential()
    model_2.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=dimension, input_length=max_length))
    model_2.add(LSTM(units=100))
    model_2.add(Dense(units=32, activation='relu'))
    model_2.add(Dense(units=3, activation='softmax'))

    # Compile the model
    model_2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Train the model with validation data
    model_2.fit(padded_sequences_train, y_train_encoded, epochs=12, batch_size=64, validation_data=(padded_sequences_val, y_val_encoded))

    predictions_2 = model_2.predict(padded_sequences_test)
    classes = np.argmax(predictions_2, axis=1)
    predictions_model_2 = np.array(classes)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, predictions_model_2)
    print(f"Accuracy: {accuracy:.4f}")

    report = classification_report(y_test, predictions_model_2, zero_division=1)
    print("Classification Report:")
    print(report)

    # Calculate confusion matrix
    confusion_mat = confusion_matrix(y_test, predictions_model_2)
    print("Confusion Matrix:")
    print(confusion_mat)
    print('\n')


Model 2 with output_dim: 10
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Accuracy: 0.7470
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.59      0.69       116
           1       0.87      0.86      0.86       196
           2       0.14      0.44      0.21        16

    accuracy                           0.75       328
   macro avg       0.61      0.63      0.59       328
weighted avg       0.82      0.75      0.77       328

Confusion Matrix:
[[ 69  22  25]
 [  9 169  18]
 [  5   4   7]]


Model 2 with output_dim: 1000
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Accuracy: 0.7988
Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.67      0.73       116
           1       0.82      0.91   

## Model 3

In [14]:
model_3 = Sequential()
model_3.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=100, input_length=max_length))
model_3.add(LSTM(units=100))
model_3.add(Dense(units=3, activation='softmax'))

# Compile the model
model_3.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model with validation data
model_3.fit(padded_sequences_train, y_train_encoded, epochs=18, batch_size=64, validation_data=(padded_sequences_val, y_val_encoded))

Epoch 1/18
Epoch 2/18
Epoch 3/18
Epoch 4/18
Epoch 5/18
Epoch 6/18
Epoch 7/18
Epoch 8/18
Epoch 9/18
Epoch 10/18
Epoch 11/18
Epoch 12/18
Epoch 13/18
Epoch 14/18
Epoch 15/18
Epoch 16/18
Epoch 17/18
Epoch 18/18


<keras.callbacks.History at 0x18be34ab9a0>

In [15]:
predictions_3 = model_3.predict(padded_sequences_test)
classes = np.argmax(predictions_3, axis=1)
predictions_model_3 = np.array(classes)

print("Model 3",'\n')
# Calculate accuracy
accuracy = accuracy_score(y_test, predictions_model_3)
print(f"Accuracy: {accuracy:.4f}")

report = classification_report(y_test, predictions_model_3)
print("Classification Report:")
print(report)

# Calculate confusion matrix
confusion_mat = confusion_matrix(y_test, predictions_model_3)
print("Confusion Matrix:")
print(confusion_mat)

Model 3 

Accuracy: 0.8018
Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.65      0.71       116
           1       0.81      0.92      0.86       196
           2       0.70      0.44      0.54        16

    accuracy                           0.80       328
   macro avg       0.77      0.67      0.70       328
weighted avg       0.80      0.80      0.79       328

Confusion Matrix:
[[ 75  38   3]
 [ 15 181   0]
 [  5   4   7]]


## Model 4

In [16]:
model_4 = Sequential()
model_4.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=200, input_length=max_length))
model_4.add(LSTM(units=200, return_sequences=True))
model_4.add(LSTM(units=50))
model_4.add(Dense(units=64, activation='relu'))
model_4.add(Dense(units=32, activation='relu'))
model_4.add(Dense(units=3, activation='softmax'))

# Compile the model
model_4.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model with validation data
model_4.fit(padded_sequences_train, y_train_encoded, epochs=9, batch_size=64, validation_data=(padded_sequences_val, y_val_encoded))

Epoch 1/9
Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9


<keras.callbacks.History at 0x18be8df25b0>

In [17]:
predictions_4 = model_4.predict(padded_sequences_test)
classes = np.argmax(predictions_4, axis=1)
predictions_model_4 = np.array(classes)

print("Model 4", "\n")
# Calculate accuracy
accuracy = accuracy_score(y_test, predictions_model_4)
print(f"Accuracy: {accuracy:.4f}")

report = classification_report(y_test, predictions_model_4)
print("Classification Report:")
print(report)

# Calculate confusion matrix
confusion_mat = confusion_matrix(y_test, predictions_model_4)
print("Confusion Matrix:")
print(confusion_mat)

Model 4 

Accuracy: 0.8140
Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.68      0.75       116
           1       0.83      0.91      0.87       196
           2       0.56      0.56      0.56        16

    accuracy                           0.81       328
   macro avg       0.74      0.72      0.73       328
weighted avg       0.81      0.81      0.81       328

Confusion Matrix:
[[ 79  34   3]
 [ 13 179   4]
 [  4   3   9]]


In [36]:
predictions_4 = model_4.predict(padded_sequences_test)
classes = np.argmax(predictions_4, axis=1)
predictions_model_4 = np.array(classes)

report = classification_report(y_test, predictions_model_4)
print("Classification Report:")
print(report)

Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.68      0.75       116
           1       0.83      0.91      0.87       196
           2       0.56      0.56      0.56        16

    accuracy                           0.81       328
   macro avg       0.74      0.72      0.73       328
weighted avg       0.81      0.81      0.81       328



## Model 5

In [18]:
model_5 = Sequential()
model_5.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=100, input_length=max_length))
model_5.add(LSTM(units=50))
model_5.add(Dense(units=3, activation='softmax'))

# Compile the model
model_5.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model with validation data
model_5.fit(padded_sequences_train, y_train_encoded, epochs=10, batch_size=64, validation_data=(padded_sequences_val, y_val_encoded))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x18bede32f40>

In [19]:
predictions_5 = model_5.predict(padded_sequences_test)
classes = np.argmax(predictions_5, axis=1)
predictions_model_5 = np.array(classes)

print("Model 5", "\n")
# Calculate accuracy
accuracy = accuracy_score(y_test, predictions_model_5)
print(f"Accuracy: {accuracy:.4f}")

report = classification_report(y_test, predictions_model_5)
print("Classification Report:")
print(report)

# Calculate confusion matrix
confusion_mat = confusion_matrix(y_test, predictions_model_5)
print("Confusion Matrix:")
print(confusion_mat)

Model 5 

Accuracy: 0.8140
Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.62      0.73       116
           1       0.80      0.96      0.87       196
           2       0.70      0.44      0.54        16

    accuracy                           0.81       328
   macro avg       0.79      0.67      0.71       328
weighted avg       0.82      0.81      0.80       328

Confusion Matrix:
[[ 72  42   2]
 [  7 188   1]
 [  3   6   7]]


In [20]:
model_2 = Sequential()
model_2.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=10000, input_length=max_length))
model_2.add(LSTM(units=100))
model_2.add(Dense(units=32, activation='relu'))
model_2.add(Dense(units=3, activation='softmax'))

# Compile the model
model_2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model with validation data
model_2.fit(padded_sequences_train, y_train_encoded, epochs=12, batch_size=64, validation_data=(padded_sequences_val, y_val_encoded))

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<keras.callbacks.History at 0x18b800b04c0>

In [21]:
    predictions_2 = model_2.predict(padded_sequences_test)
    classes = np.argmax(predictions_2, axis=1)
    predictions_model_2 = np.array(classes)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, predictions_model_2)
    print(f"Accuracy: {accuracy:.4f}")

    report = classification_report(y_test, predictions_model_2, zero_division=1)
    print("Classification Report:")
    print(report)

    # Calculate confusion matrix
    confusion_mat = confusion_matrix(y_test, predictions_model_2)
    print("Confusion Matrix:")
    print(confusion_mat)
    print('\n')

Accuracy: 0.7927
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.59      0.69       116
           1       0.78      0.94      0.86       196
           2       0.67      0.38      0.48        16

    accuracy                           0.79       328
   macro avg       0.76      0.64      0.68       328
weighted avg       0.79      0.79      0.78       328

Confusion Matrix:
[[ 69  46   1]
 [  9 185   2]
 [  5   5   6]]




## Prediction in new Batch

In [10]:
def count_the_results(predictions):
    negative = 0
    neutral = 0
    positive = 0
    
    for i in range(len(predictions)):
        if predictions[i]==0:
            negative +=1
        if predictions[i]==1:
            neutral +=1
        if predictions[i]==2:
            positive +=1
    return(negative, neutral, positive)

### Ekloges

In [37]:
Ekloge_2023= pd.read_csv("Εκλογές_2023_final.csv")
Ekloge_2023 = Ekloge_2023.to_numpy().ravel()

In [38]:
X = preprocess_tweets(Ekloge_2023)
sequences_Ekloges = tokenizer.texts_to_sequences(X)
padded_sequences_Ekloges = tf.keras.utils.pad_sequences(sequences_Ekloges, maxlen=max_length)

In [39]:
predictions_4 = model_4.predict(padded_sequences_Ekloges)
classes = np.argmax(predictions_4, axis=1)
predictions_model_4 = np.array(classes)



In [40]:
results_ekloges = count_the_results(predictions_model_4)
results_ekloges

(1228, 1099, 70)

### Μητσοτακης1

In [42]:
mits = pd.read_csv("Μητσοτακης_final.csv")
mits = mits.to_numpy().ravel()
mits.shape

(1117,)

In [43]:
X = preprocess_tweets(mits)
sequences_mits = tokenizer.texts_to_sequences(X)
padded_sequences_mits = tf.keras.utils.pad_sequences(sequences_mits, maxlen=max_length)

In [44]:
predictions_4 = model_4.predict(padded_sequences_mits)
classes = np.argmax(predictions_4, axis=1)
predictions_model_4 = np.array(classes)
results_mitsotakis = count_the_results(predictions_model_4)



In [45]:
results_mitsotakis

(585, 502, 30)

### ΝΔ1

In [47]:
ND1 = pd.read_csv("ΝΔ_final.csv")
ND1 = ND1.to_numpy().ravel()
ND1.shape

(945,)

In [48]:
X = preprocess_tweets(ND1)
sequences_ND1 = tokenizer.texts_to_sequences(X)
padded_sequences_ND1 = tf.keras.utils.pad_sequences(sequences_ND1, maxlen=max_length)

In [49]:
predictions_4 = model_4.predict(padded_sequences_ND1)
classes = np.argmax(predictions_4, axis=1)
predictions_model_4 = np.array(classes)
results_ND1 = count_the_results(predictions_model_4)
results_ND1



(536, 385, 24)

### ΣΥΡΙΖΑ1

In [51]:
Suriza = pd.read_csv("ΣΥΡΙΖΑ_final.csv")
Suriza = Suriza.to_numpy().ravel()
Suriza.shape

(1237,)

In [52]:
X = preprocess_tweets(Suriza)
sequences_Suriza = tokenizer.texts_to_sequences(X)
padded_sequences_Suriza = tf.keras.utils.pad_sequences(sequences_Suriza, maxlen=max_length)

In [53]:
predictions_4 = model_4.predict(padded_sequences_Suriza)
classes = np.argmax(predictions_4, axis=1)
predictions_model_4 = np.array(classes)
results_Suriza = count_the_results(predictions_model_4)
results_Suriza



(649, 537, 51)

### Τσιπρας1

In [28]:
tsipras = pd.read_csv("Τσιπρας_final.csv")
tsipras = tsipras.to_numpy().ravel()
tsipras.shape

(837,)

In [29]:
X = preprocess_tweets(tsipras)
sequences_tsipras= tokenizer.texts_to_sequences(X)
padded_sequences_tsipras = tf.keras.utils.pad_sequences(sequences_tsipras, maxlen=max_length)

In [30]:
predictions_4 = model_4.predict(padded_sequences_tsipras)
classes = np.argmax(predictions_4, axis=1)
predictions_model_4 = np.array(classes)
results_tsipras = count_the_results(predictions_model_4)
results_tsipras



(483, 342, 12)

In [31]:
from tabulate import tabulate


table = [
    ["Hashtag", "Negatives", "Neutrals", "Positives"],
    ["Ekloges", results_ekloges[0], results_ekloges[1], results_ekloges[2]],
    ["Mitsotakis", results_mitsotakis[0], results_mitsotakis[1], results_mitsotakis[2]],
    ["ND1", results_ND1[0], results_ND1[1], results_ND1[2]],
    ["Suriza", results_Suriza[0], results_Suriza[1], results_Suriza[2]],
    ["Tsipras", results_tsipras[0], results_tsipras[1], results_tsipras[2]]
]

print(tabulate(table, headers="firstrow"))

Hashtag       Negatives    Neutrals    Positives
----------  -----------  ----------  -----------
Ekloges            1228        1099           70
Mitsotakis          585         502           30
ND1                 536         385           24
Suriza              649         537           51
Tsipras             483         342           12


### Saving the model

In [54]:
np.save('ΣΥΡΙΖΑ_final.npy', predictions_model_4)

In [109]:
#model_4.save("model_4.h5")

In [14]:
model_4 = tf.keras.models.load_model("model_4.h5")
