References:

[1] Ukraine conflict twitter dataset: https://www.kaggle.com/datasets/bwandowando/ukraine-russian-crisis-twitter-dataset-1-2-m-rows/code <br>
[2] Tweets on Ukraine crisis: https://www.kaggle.com/code/stpeteishii/tweets-on-ukraine-crisis/notebook <br>
[3] Generate wordcloud from English tweets: https://www.kaggle.com/code/bwandowando/generate-wordcloud-from-english-tweets <br>
[4] Sentiment analysis for tweets on the Ukraine conflict: https://github.com/DoubleGremlin181/Analyzing-Tweets-2022-Russian-Invasion-of-Ukraine/blob/master/Tweets%20-%20Hashtag%20Analysis.ipynb <br>
[5] Digit recognition: https://malchiodi.di.unimi.it/archive/deep-learning/deep-learning-tutorial.ipynb <br>
[6] Text classification from scratch: https://keras.io/examples/nlp/text_classification_from_scratch/

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O
pd.options.mode.chained_assignment = None  # default='warn'
from tabulate import tabulate
import pprint
import csv

import gc
from pathlib import Path 

import os


# Setup

In [2]:
!pip install -q kaggle

os.environ["KAGGLE_USERNAME"] = 'emmacosta10'
os.environ["KAGGLE_KEY"] = '7f106f53792aa445c5372df5a7d011a2' 

print(os.environ['KAGGLE_USERNAME'])
print(os.environ['KAGGLE_KEY'])


emmacosta10
7f106f53792aa445c5372df5a7d011a2


In [3]:
# check kaggle API
# !kaggle datasets list 

In [4]:
# [1]
!kaggle datasets list --user 'BwandoWando'

ref                                                            title                                               size  lastUpdated          downloadCount  voteCount  usabilityRating  
-------------------------------------------------------------  -------------------------------------------------  -----  -------------------  -------------  ---------  ---------------  
bwandowando/cloud-vendors-and-related-tweets-dataset           Cloud Vendors and Related Tweets Dataset (2.14M)   371MB  2022-07-03 10:20:52            111         24  1.0              
bwandowando/reddit-rjokes-dataset                              Reddit r/Jokes Dataset (9.2K thrds, 98.4K cmnts)    10MB  2022-06-25 11:32:22            106         21  1.0              
bwandowando/robb-elementary-school-shooting-twitter-dataset    Robb Elementary shooting Twitter Dataset (2.74M)   401MB  2022-07-03 03:21:07             67         15  1.0              
bwandowando/scientist-rebellion-and-related-tweets-dataset     Scienti

In [5]:
# Printing dataset files
# !kaggle datasets files 'bwandowando/ukraine-russian-crisis-twitter-dataset-1-2-m-rows'

In [6]:
!kaggle datasets download 'bwandowando/ukraine-russian-crisis-twitter-dataset-1-2-m-rows'   -p '/kaggle/working'   

Downloading ukraine-russian-crisis-twitter-dataset-1-2-m-rows.zip to /kaggle/working
100% 9.88G/9.89G [01:57<00:00, 120MB/s] 
100% 9.89G/9.89G [01:57<00:00, 90.5MB/s]


In [7]:
print(os.listdir('/kaggle/working'))
print(os.listdir('.'))

['ukraine-russian-crisis-twitter-dataset-1-2-m-rows.zip']
['.config', 'sample_data']


# Reading the data

In [8]:
# Extracting tweets within May
import zipfile
single_file = '_UkraineCombinedTweetsDeduped.csv.gzip'
zip_file = '/kaggle/working/ukraine-russian-crisis-twitter-dataset-1-2-m-rows.zip'

with zipfile.ZipFile(zip_file,"r") as zip_ref:
    zip_ref.extract('0505_to_0507_UkraineCombinedTweetsDeduped.csv.gzip', '/kaggle/working')
  
        
for i in range(501, 532):
   with zipfile.ZipFile(zip_file,"r") as zip_ref:
     try:
        zip_ref.extract('0' + str(i) + single_file, '/kaggle/working')
     except:
        continue

In [9]:
# Removing compressed file
os.remove("/kaggle/working/ukraine-russian-crisis-twitter-dataset-1-2-m-rows.zip")

In [10]:
csvs = []
for dirname, _, filenames in os.walk('/kaggle/working'):
    filenames.sort()
    for filename in filenames:
        print(filename)
        fullpath = os.path.join(dirname, filename)
        csvs.append(fullpath)

0501_UkraineCombinedTweetsDeduped.csv.gzip
0502_UkraineCombinedTweetsDeduped.csv.gzip
0503_UkraineCombinedTweetsDeduped.csv.gzip
0504_UkraineCombinedTweetsDeduped.csv.gzip
0505_to_0507_UkraineCombinedTweetsDeduped.csv.gzip
0508_UkraineCombinedTweetsDeduped.csv.gzip
0509_UkraineCombinedTweetsDeduped.csv.gzip
0510_UkraineCombinedTweetsDeduped.csv.gzip
0511_UkraineCombinedTweetsDeduped.csv.gzip
0512_UkraineCombinedTweetsDeduped.csv.gzip
0513_UkraineCombinedTweetsDeduped.csv.gzip
0514_UkraineCombinedTweetsDeduped.csv.gzip
0515_UkraineCombinedTweetsDeduped.csv.gzip
0516_UkraineCombinedTweetsDeduped.csv.gzip
0517_UkraineCombinedTweetsDeduped.csv.gzip
0518_UkraineCombinedTweetsDeduped.csv.gzip
0519_UkraineCombinedTweetsDeduped.csv.gzip
0520_UkraineCombinedTweetsDeduped.csv.gzip
0521_UkraineCombinedTweetsDeduped.csv.gzip
0522_UkraineCombinedTweetsDeduped.csv.gzip
0523_UkraineCombinedTweetsDeduped.csv.gzip
0524_UkraineCombinedTweetsDeduped.csv.gzip
0525_UkraineCombinedTweetsDeduped.csv.gzip
052

# Creating, filtering and transforming the dataframe

In [11]:
# Keep only relevant columns
# Consolidate the data in a single dataframe

columns = ["tweetid","hashtags","text","language","is_retweet"]

csvs.sort()
dfs = pd.read_csv(csvs[0], engine='python', compression='gzip',encoding='utf-8', quoting=csv.QUOTE_ALL)
dfs = dfs[columns] 
     
for csvfile in csvs[1:]:
    df = pd.read_csv(csvfile, engine='python', compression='gzip',encoding='utf-8', quoting=csv.QUOTE_ALL)
    df = df[columns]
     
    dfs=pd.concat([dfs,df] ,ignore_index=True)
    print(Path(csvfile).name + " " + str(len(df)) + " rows")
    print("Accumulated "  + str(len(dfs)) + " rows")  


df = None
del df
gc.collect()



0502_UkraineCombinedTweetsDeduped.csv.gzip 329516 rows
Accumulated 681403 rows
0503_UkraineCombinedTweetsDeduped.csv.gzip 307057 rows
Accumulated 988460 rows
0504_UkraineCombinedTweetsDeduped.csv.gzip 384278 rows
Accumulated 1372738 rows
0505_to_0507_UkraineCombinedTweetsDeduped.csv.gzip 1046236 rows
Accumulated 2418974 rows
0508_UkraineCombinedTweetsDeduped.csv.gzip 371726 rows
Accumulated 2790700 rows
0509_UkraineCombinedTweetsDeduped.csv.gzip 399145 rows
Accumulated 3189845 rows
0510_UkraineCombinedTweetsDeduped.csv.gzip 352564 rows
Accumulated 3542409 rows
0511_UkraineCombinedTweetsDeduped.csv.gzip 310075 rows
Accumulated 3852484 rows
0512_UkraineCombinedTweetsDeduped.csv.gzip 302490 rows
Accumulated 4154974 rows
0513_UkraineCombinedTweetsDeduped.csv.gzip 280219 rows
Accumulated 4435193 rows
0514_UkraineCombinedTweetsDeduped.csv.gzip 326229 rows
Accumulated 4761422 rows
0515_UkraineCombinedTweetsDeduped.csv.gzip 420237 rows
Accumulated 5181659 rows
0516_UkraineCombinedTweetsDeduped

33

In [12]:
dfs.head()

Unnamed: 0,tweetid,hashtags,text,language,is_retweet
0,1520553587276795905,"[{'text': 'StopRussianOil', 'indices': [245, 2...",Remember this 👇 image next time you fix a meal...,en,False
1,1520553587490926594,[],🇺🇦 Збройні Сили України продовжують повертати ...,uk,True
2,1520553587763515392,"[{'text': 'Politics', 'indices': [51, 60]}, {'...",Everything Is [Not] Fine: Half-.. - via @pensi...,en,False
3,1520553587939676160,"[{'text': 'Tigray', 'indices': [28, 35]}]","People #Tigray lives in condition of No water,...",en,True
4,1520553588086525952,"[{'text': 'Russia', 'indices': [0, 7]}, {'text...",#Russia’s forces have stolen “several hundred ...,en,False


In [13]:
print(dfs.columns) 

Index(['tweetid', 'hashtags', 'text', 'language', 'is_retweet'], dtype='object')


In [14]:
# Filtering to find only tweets in English

df_en=dfs[dfs['language']=='en']

str(len(df_en))

'6133758'

In [15]:
# Filtering to drop tweets without hashtag

df_ht=df_en[df_en['hashtags']!='[]']

str(len(df_ht))

'5222720'

In [16]:
# Filtering to leave out retweets

df_nort=df_ht[df_ht['is_retweet']==False]


str(len(df_nort))

'1885176'

In [17]:
pd.options.display.max_colwidth = 170
df_nort[0:5].hashtags

0                                                                         [{'text': 'StopRussianOil', 'indices': [245, 260]}, {'text': 'StopRussiaNow', 'indices': [262, 276]}]
2       [{'text': 'Politics', 'indices': [51, 60]}, {'text': 'Art', 'indices': [61, 65]}, {'text': 'Trump', 'indices': [66, 72]}, {'text': 'JerryNelson', 'indices': [73, 85]}]
4                                                                                             [{'text': 'Russia', 'indices': [0, 7]}, {'text': 'Ukraine', 'indices': [86, 94]}]
18    [{'text': 'Russian', 'indices': [0, 8]}, {'text': 'StandWithUkraine', 'indices': [160, 177]}, {'text': 'UkraineWar', 'indices': [178, 189]}, {'text': 'Russia', 'indic...
20    [{'text': 'Ukraine', 'indices': [185, 193]}, {'text': 'Kyiv', 'indices': [194, 199]}, {'text': 'Shelter', 'indices': [200, 208]}, {'text': 'Dogs', 'indices': [209, 21...
Name: hashtags, dtype: object

# Getting first hashtag for each tweet

In [18]:
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize


def calculate_first_ht(row):
    return word_tokenize(row['hashtags'])[5][1:]


df_nort['first_ht']=df_nort.apply(calculate_first_ht, axis=1)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [19]:
#df_og = df_nort
#df_nort[0:10]

In [20]:
columns = ["tweetid", "text", "first_ht"]
df_col = df_nort[columns]
df_col.head()

Unnamed: 0,tweetid,text,first_ht
0,1520553587276795905,"Remember this 👇 image next time you fix a meal for your family.\n\nEvery day, Russia sells 700M euro worth of oil and gas to the world. It's your tax money, too.\n\nW...",StopRussianOil
2,1520553587763515392,Everything Is [Not] Fine: Half-.. - via @pensignal #Politics #Art #Trump #JerryNelson https://t.co/bZodwHyd3W,Politics
4,1520553588086525952,"#Russia’s forces have stolen “several hundred thousand tons” of grain in the areas of #Ukraine they occupy, Ukraine’s deputy agriculture minister says.\n\nhttps://t.c...",Russia
18,1520553593430024193,"#Russian ""killer squad has been linked to vile war crimes against civilians in Bucha—and their own severely injured brothers-in-arms""\n\nhttps://t.co/JQwaX0DBk2\n\n#S...",Russian
20,1520553593950068736,"Cain is a prominent guy. In all his habits, facial expressions.\nOur shelter needs your help!\nRaising funds food for animals.\nPayPal: dogandcat.helper@gmail.com\nht...",Ukraine


# Keeping only tweets with top 20 hashtags as first

In [21]:
%%capture
!pip install swifter
import swifter

# Removing non ASCII characters 

def strip_non_ascii(string):
    ''' Returns the string without non ASCII characters'''
    stripped = (c for c in string if 0 < ord(c) < 127)
    return ''.join(stripped)

df_col.reset_index(drop=True)

df_col['first_ht'] = df_col['first_ht'].str.lower()
df_col['first_ht'] = df_col['first_ht'].swifter.apply(strip_non_ascii)

In [22]:
# Filtering tweets to keep only those with 20 most popular ones as the first
ht_list = (df_col.loc[:,'first_ht'].value_counts()[0:20].keys())

df = df_col[df_col['first_ht'].isin(ht_list)] 
print("Most popular hashtags: " + str(ht_list.values))
print("\nDataframe length: " + str(len(df)))

Most popular hashtags: ['ukraine' 'news' 'russia' 'business' 'standwithukraine' 'putin' 'usa'
 'scotus' 'biden' 'russian' 'freeshipping' 'nato' 'slavaukraini'
 'eurovision' 'ukrainerussiawar' 'ukrainewar' 'ukrainian' 'mariupol'
 'azovstal' 'hatinc']

Dataframe length: 784546


In [23]:
# Create dictonary for result check
#print(tabulate(df[0:2], headers='keys', tablefmt='psql'))
df_dict =  df.set_index('tweetid').T.to_dict('list')
print(list(df_dict.items())[0])

  This is separate from the ipykernel package so we can avoid doing imports until


(1520553588086525952, ['#Russia’s forces have stolen “several hundred thousand tons” of grain in the areas of #Ukraine they occupy, Ukraine’s deputy agriculture minister says.\n\nhttps://t.co/9bqEnlF8L2', 'russia'])


In [24]:
# checking if needed
#print(df_dict[1520553588086525952][0])
#print(df_dict[1520553588086525952][1])

# Removing non significant data from the tweets

In [25]:
# Defining functions to remove urls, emojis, etc. See ref. [3]
import re
from bs4 import BeautifulSoup
from html import unescape

def remove_urls(x):
    cleaned_string = re.sub(r'(https|http)?:\/\/(\w|\.|\/|\?|\=|\&|\%)*\b', '', str(x), flags=re.MULTILINE)
    return cleaned_string

def unescape_stuff(x):
    soup = BeautifulSoup(unescape(x), 'lxml')
    return soup.text

def deEmojify(x):
    regrex_pattern = re.compile(pattern = "["
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF" 
        u"\U00002500-\U00002BEF"  # chinese char
        u"\U00002702-\U000027B0"
        u"\U00002702-\U000027B0"
        u"\U000024C2-\U0001F251"
        u"\U0001f926-\U0001f937"
        u"\U00010000-\U0010ffff"
        u"\u2640-\u2642"
        u"\u2600-\u2B55"
        u"\u200d"
        u"\u23cf"
        u"\u23e9"
        u"\u231a"
        u"\ufe0f"  # dingbats
        u"\u3030" # flags (iOS)
        "]+", flags = re.UNICODE)
    return regrex_pattern.sub(r'', x)

In [26]:
df['text'] = df['text'].str.lower()
df[0:2]

Unnamed: 0,tweetid,text,first_ht
4,1520553588086525952,"#russia’s forces have stolen “several hundred thousand tons” of grain in the areas of #ukraine they occupy, ukraine’s deputy agriculture minister says.\n\nhttps://t.c...",russia
18,1520553593430024193,"#russian ""killer squad has been linked to vile war crimes against civilians in bucha—and their own severely injured brothers-in-arms""\n\nhttps://t.co/jqwax0dbk2\n\n#s...",russian


In [27]:
# Removing urls

df['text'] = df['text'].swifter.apply(remove_urls)

Pandas Apply:   0%|          | 0/784546 [00:00<?, ?it/s]

In [28]:
# Applying filters on text

df['text'] = df['text'].swifter.apply(unescape_stuff)

Pandas Apply:   0%|          | 0/784546 [00:00<?, ?it/s]

In [29]:
df.reset_index(drop=True)

# Removing emojis

df['text'] = df['text'].swifter.apply(deEmojify)

Pandas Apply:   0%|          | 0/784546 [00:00<?, ?it/s]

In [30]:
# Removing hashtags and tags

def removeht(tweet):

  tweet1 = re.sub("@[A-Za-z0-9_]+","", tweet)
  tweet2 = re.sub("#[A-Za-z0-9_]+","", tweet1)

  return (tweet2) 

df['text'] = df['text'].swifter.apply(removeht)
print(df['text'][:5])

Pandas Apply:   0%|          | 0/784546 [00:00<?, ?it/s]

4                                  ’s forces have stolen “several hundred thousand tons” of grain in the areas of  they occupy, ukraine’s deputy agriculture minister says.\n\n
18                                     "killer squad has been linked to vile war crimes against civilians in bucha—and their own severely injured brothers-in-arms"\n\n\n\n    
20    cain is a prominent guy. in all his habits, facial expressions.\nour shelter needs your help!\nraising funds food for animals.\npaypal: dogandcat.helper.com\n\n\n    ...
21                                                                                                         |  soldiers in trench with  made panzerfaust-3 anti-tank rocket.\n\n
22    the ‘ukrainisation’ of opinion is causing problems to the german coalition. partnership with  has been a part of the german policy for decades. they need to create th...
Name: text, dtype: object


In [31]:
# Function for removing stop words

import spacy
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
sp = spacy.load('en_core_web_sm')

cachedStopWords = sp.Defaults.stop_words
cachedStopWords = [x.lower() for x in cachedStopWords]

def remove_stopwords(x):
    
    meaningful_words = []
    my_list = x
    
    tokenized_my_list = word_tokenize(my_list) 
    meaningful_words = [w for w in tokenized_my_list if not w in cachedStopWords]
        
    return " ".join(meaningful_words)

In [32]:
# Removing stop words

df['text'] = df['text'].swifter.apply(remove_stopwords)

Pandas Apply:   0%|          | 0/784546 [00:00<?, ?it/s]

In [33]:
# Removing words with len < 3

df['text'] = df['text'].str.findall('\w{2,}').str.join(' ')

In [34]:
print(df['text'][:5])

4                                                            forces stolen thousand tons grain areas occupy ukraine deputy agriculture minister says
18                                                         killer squad linked vile war crimes civilians bucha and severely injured brothers in arms
20                            cain prominent guy habits facial expressions shelter needs help raising funds food animals paypal dogandcat helper com
21                                                                                                      soldiers trench panzerfaust anti tank rocket
22    ukrainisation opinion causing problems german coalition partnership german policy decades need create nss quickly announce steps cogently says
Name: text, dtype: object


# Preparing the data for the model

In [35]:
# Partitioning the dataset

def get_dataset_partitions_pd(df, train_split=0.8, val_split=0.1, test_split=0.1):
    df = df.sample(frac=1).reset_index(drop=True)
    assert (train_split + test_split + val_split) == 1
    
    assert val_split == test_split 

    df_sample = df.sample(frac=1, random_state=12)
    indices_or_sections = [int(train_split * len(df)), int((1 - test_split) * len(df))]
    
    train_ds, val_ds, test_ds = np.split(df_sample, indices_or_sections)
    
    return train_ds, val_ds, test_ds

(tr,va,ts) = get_dataset_partitions_pd(df)

print ("Dataset length: " + str(len(df)))
print ("Training set length: " + str(len(tr)))
print ("Validation set length: " + str(len(va)))
print ("Test set length: " + str(len(ts)))

Dataset length: 784546
Training set length: 627636
Validation set length: 78455
Test set length: 78455


In [36]:
# Creating variables for training and validating data and labels

x_train = tr["text"]
x_val = va["text"]  

y_train = tr["first_ht"]
y_val = va["first_ht"]  

In [37]:
# Creating list of the 20 most popular hashtags

list_hashtags = np.sort(np.array(list(set(y_train))))
print(list_hashtags)

['azovstal' 'biden' 'business' 'eurovision' 'freeshipping' 'hatinc'
 'mariupol' 'nato' 'news' 'putin' 'russia' 'russian' 'scotus'
 'slavaukraini' 'standwithukraine' 'ukraine' 'ukrainerussiawar'
 'ukrainewar' 'ukrainian' 'usa']


In [38]:
# Encoding the hashtags as I can't have them as strings

y_train_ohe = pd.get_dummies(y_train)
y_val_ohe = pd.get_dummies(y_val)
#print(y_val_ohe)

# The model

In [39]:
from tensorflow.keras.layers import TextVectorization
import string
import re

# Model constants

max_features = 20000
embedding_dim = 128
sequence_length = 60

# Using this layer to normalize, split, and map strings to integers

vectorize_layer = TextVectorization(
    standardize=None,
    max_tokens=max_features,
    output_mode="int",
    output_sequence_length=sequence_length,
)

In [40]:
vectorize_layer.adapt(x_train)

In [41]:
import tensorflow as tf 
from tensorflow.keras import layers

text_input = tf.keras.Input(shape=(1,), dtype=tf.string, name='text')
x = vectorize_layer(text_input)

x = layers.Embedding(max_features, embedding_dim)(x)

x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=2)(x)

x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=2)(x)

x = layers.GlobalMaxPooling1D()(x)

x = layers.Dropout(0.6)(x)

x = layers.Dense(128, activation="relu")(x)

x = layers.Dropout(0.6)(x)

predictions = layers.Dense(20, activation="sigmoid", name="predictions")(x)

model = tf.keras.Model(text_input, predictions)

model.compile(loss="binary_crossentropy", optimizer=tf.keras.optimizers.Adam(learning_rate=0.00003) ,metrics=["accuracy"])

model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text (InputLayer)           [(None, 1)]               0         
                                                                 
 text_vectorization (TextVec  (None, 60)               0         
 torization)                                                     
                                                                 
 embedding (Embedding)       (None, 60, 128)           2560000   
                                                                 
 conv1d (Conv1D)             (None, 27, 128)           114816    
                                                                 
 conv1d_1 (Conv1D)           (None, 11, 128)           114816    
                                                                 
 global_max_pooling1d (Globa  (None, 128)              0         
 lMaxPooling1D)                                              

# Training and validation

In [42]:
epochs = 15

# Fit the model using the train and test datasets.
model.fit(x_train, y_train_ohe, validation_data=(x_val,y_val_ohe), epochs=epochs)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7fbe7f252590>

# Testing

In [43]:
x_test = ts["text"]  
y_test = ts["first_ht"]
y_test_ohe = pd.get_dummies(y_test)

model.evaluate(x_test, y_test_ohe)



[0.10994822531938553, 0.5567268133163452]

# Applying the model and seeing the results

Randomly taking 10 tweets from the test set and printing out the top 3 predicted hashtags for each one

In [79]:
np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})

ts_res = ts.sample(n = 10).sort_values(by=['tweetid'])
tweetid_list = ts_res["tweetid"].values.tolist()
#print(ts_res)
x_ts_res = ts_res["text"]
y_ts_res = ts_res["first_ht"]

n = 3

y_preds = model.predict(x_ts_res)
#print(y_preds)

ord_preds = np.argsort(-y_preds, axis=1)[:,0:n]
#print(ord_preds)

ts_res_dict =  ts_res.set_index('tweetid').T.to_dict('list')
#print(ts_res_dict)

j=0
for y in y_preds:
  print('\033[1m' + "Original tweet: " + '\033[0m' + str(df_dict[tweetid_list[j]][0])) 
  print('\n')
  print('\033[1m' + "First hashtag: " + '\033[0m' + str(df_dict[tweetid_list[j]][1])) 
  print('\n')
  print('\033[1m' + "Predicted hashtags: " + '\033[0m' + list_hashtags[int(ord_preds[j][0])] + ", " + list_hashtags[int(ord_preds[j][1])] + ", " + list_hashtags[int(ord_preds[j][2])]  )  
  print()
  print("_______________________________")
  print()
  j = j+1

[1mOriginal tweet: [0m@redformans31 Love it.  This guy is making Putin look like the horrible tiny monster he is.  #SlavaUkraini


[1mFirst hashtag: [0mslavaukraini


[1mPredicted hashtags: [0mukraine, slavaukraini, standwithukraine

_______________________________

[1mOriginal tweet: [0m@on_scotland the real nazis in Ukraine are the Russian invaders.... #StandWIthUkraine


[1mFirst hashtag: [0mstandwithukraine


[1mPredicted hashtags: [0mstandwithukraine, slavaukraini, ukraine

_______________________________

[1mOriginal tweet: [0mA good read to understand what we are in for from #SCOTUS
#WomensRightsAreHumanRights https://t.co/VxjlfiHNxM


[1mFirst hashtag: [0mscotus


[1mPredicted hashtags: [0mukraine, standwithukraine, russia

_______________________________

[1mOriginal tweet: [0m@Roman_Baber @JustinTrudeau Here is a 'justification' in a pic sourced recently from #Russia's MOD. Genocide is an international crime, Mr. President, @POTUS. Genocide by genetic weap