<a href="https://colab.research.google.com/github/fjadidi2001/fake_news_detection/blob/main/DansE_Mar31.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data

# Mount Drive in Colab

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Load the Dataset



In [2]:
import pandas as pd
import numpy as np

# Replace with your actual file path
file_path = '/content/drive/MyDrive/Projects/Hayat/facebook-fact-check.csv'


df = pd.read_csv(file_path, encoding='latin-1')

# Initial Data Inspection

In [3]:
print(df.head(2))  # See first 2 rows
print("\nMissing values:\n", df.isnull().sum())

     account_id       post_id    Category               Page  \
0  1.840000e+14  1.040000e+15  mainstream  ABC News Politics   
1  1.840000e+14  1.040000e+15  mainstream  ABC News Politics   

                                            Post URL Date Published Post Type  \
0  https://www.facebook.com/ABCNewsPolitics/posts...      9/19/2016     video   
1  https://www.facebook.com/ABCNewsPolitics/posts...      9/19/2016      link   

               Rating Debate  share_count  reaction_count  comment_count  \
0  no factual content    NaN          NaN           146.0           15.0   
1         mostly true    NaN          1.0            33.0           34.0   

                                        Context Post  
0  WATCH: &quot;JEB EXCLAMATION POINT!&quot; - Je...  
1  Can either candidate move the needle in the de...  

Missing values:
 account_id           0
post_id              0
Category             0
Page                 0
Post URL             0
Date Published       0
Post Type    

# Handle Missing Values

In [4]:
# Strategy 1: Fill categorical columns
df['Rating'] = df['Rating'].fillna('Unknown')
df['Debate'] = df['Debate'].fillna('Not Specified')

# Strategy 2: Fill numerical columns with median
numeric_cols = ['share_count', 'reaction_count', 'comment_count']
df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].median())

# Alternative: Drop rows with critical missing values
# df = df.dropna(subset=['important_column'])

# Additional Preprocessing

In [5]:
# Convert date to datetime format
df['Date Published'] = pd.to_datetime(df['Date Published'], format='%m/%d/%Y')

# Clean text columns
df['Context Post'] = df['Context Post'].str.replace('"', '')

In [6]:
df['account_id'] = df['account_id'].astype(str)
df['post_id'] = df['post_id'].astype(str)

In [7]:
categorical_cols = ['Category', 'Page', 'Post Type']
df[categorical_cols] = df[categorical_cols].fillna('Unknown')

In [8]:
print(df.info())
print(df.isnull().sum())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2282 entries, 0 to 2281
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   account_id      2282 non-null   object        
 1   post_id         2282 non-null   object        
 2   Category        2282 non-null   object        
 3   Page            2282 non-null   object        
 4   Post URL        2282 non-null   object        
 5   Date Published  2282 non-null   datetime64[ns]
 6   Post Type       2282 non-null   object        
 7   Rating          2282 non-null   object        
 8   Debate          2282 non-null   object        
 9   share_count     2282 non-null   float64       
 10  reaction_count  2282 non-null   float64       
 11  comment_count   2282 non-null   float64       
 12  Context Post    2282 non-null   object        
dtypes: datetime64[ns](1), float64(3), object(9)
memory usage: 231.9+ KB
None
account_id        0
post_id        

# Main source

## wordembeddings

In [None]:
# Step 1: Clean up the environment
!pip uninstall -y numpy mittens gensim scipy smart-open wrapt tensorflow tensorflow-datasets dm-tree numba

# Step 2: Install compatible versions
!pip install numpy==1.26.4 mittens==0.2 gensim==4.3.3 scipy==1.13.1 smart-open==7.1.0 wrapt==1.17.2

# Step 3: Restart runtime (run this once, then comment out)
import os
os.kill(os.getpid(), 9)

# Step 4: After restart, run the code
import numpy as np
print("NumPy version:", np.__version__)

from gensim.models import Word2Vec, FastText
import gensim
print("Gensim version:", gensim.__version__)

from mittens import Mittens, GloVe
import mittens
print("Mittens version:", mittens.__version__)

Found existing installation: numpy 2.0.2
Uninstalling numpy-2.0.2:
  Successfully uninstalled numpy-2.0.2
[0mFound existing installation: scipy 1.14.1
Uninstalling scipy-1.14.1:
  Successfully uninstalled scipy-1.14.1
Found existing installation: smart-open 7.1.0
Uninstalling smart-open-7.1.0:
  Successfully uninstalled smart-open-7.1.0
Found existing installation: wrapt 1.17.2
Uninstalling wrapt-1.17.2:
  Successfully uninstalled wrapt-1.17.2
Found existing installation: tensorflow 2.18.0
Uninstalling tensorflow-2.18.0:
  Successfully uninstalled tensorflow-2.18.0
Found existing installation: tensorflow-datasets 4.9.8
Uninstalling tensorflow-datasets-4.9.8:
  Successfully uninstalled tensorflow-datasets-4.9.8
Found existing installation: dm-tree 0.1.9
Uninstalling dm-tree-0.1.9:
  Successfully uninstalled dm-tree-0.1.9
Found existing installation: numba 0.60.0
Uninstalling numba-0.60.0:
  Successfully uninstalled numba-0.60.0
Collecting numpy==1.26.4
  Downloading numpy-1.26.4-cp311-

In [1]:
# Verify versions after restart
import numpy as np
print("NumPy version:", np.__version__)

from gensim.models import Word2Vec, FastText
import gensim
print("Gensim version:", gensim.__version__)

from mittens import GloVe
import mittens
print("Mittens version (GloVe only):", mittens.__version__)

class WordEmbeddings:

    def __init__(self, corpus, normalize_tfidf=False):
        self.corpus = corpus
        self.normalize_tfidf = normalize_tfidf
        self.documents = []
        self.sentences = []
        self.word2id = {}
        self.no_words = 0
        self.max_size = 0
        self.no_docs = len(self.corpus)

    def prepareDocuments(self):
        word_id = 1
        for document in self.corpus:
            doc = []
            for sentence in document:
                self.sentences.append(sentence)
                for word in sentence:
                    if self.word2id.get(word) is None:
                        self.word2id[word] = word_id
                        word_id += 1
                    doc.append(self.word2id[word])
            if self.max_size < len(doc):
                self.max_size = len(doc)
            self.documents.append(doc)

        self.no_words = len(self.word2id) + 1
        return self.documents

    def word2vecEmbedding(self, window_size=10, no_components=128, epochs=10, workers=os.cpu_count(), sg=0, learning_rate=0.05):
        self.word2vec = np.empty(shape=(self.no_words, no_components))
        model = Word2Vec(self.sentences, vector_size=no_components, window=window_size, min_count=1,
                         workers=workers, sg=sg, alpha=learning_rate, epochs=epochs)
        self.word2vec[0] = np.zeros(no_components)
        for word in self.word2id:
            self.word2vec[self.word2id[word]] = model.wv[word]
        return self.word2vec

    def word2GloVeEmbedding(self, window_size=10, no_components=128, epochs=10, workers=os.cpu_count(), learning_rate=0.05):
        self.word2glove = np.empty(shape=(self.no_words, no_components))
        model = GloVe(n=no_components, learning_rate=learning_rate)

        vocab = list(self.word2id.keys())
        cooc_matrix = np.zeros((len(vocab), len(vocab)))
        for sentence in self.sentences:
            for i, word in enumerate(sentence):
                word_idx = self.word2id[word] - 1
                for j in range(max(0, i - window_size), min(len(sentence), i + window_size + 1)):
                    if i != j:
                        cooc_idx = self.word2id[sentence[j]] - 1
                        cooc_matrix[word_idx, cooc_idx] += 1

        embeddings = model.fit(cooc_matrix)
        self.word2glove[0] = np.zeros(no_components)
        for word, idx in self.word2id.items():
            self.word2glove[idx] = embeddings[idx - 1]
        return self.word2glove

    def word2FastTextEmbeddings(self, window_size=10, no_components=128, epochs=10, workers=os.cpu_count(), sg=0, learning_rate=0.05):
        self.word2fasttext = np.empty(shape=(self.no_words, no_components))
        model = FastText(self.sentences, vector_size=no_components, window=window_size, min_count=1,
                         workers=workers, sg=sg, alpha=learning_rate, epochs=epochs)
        self.word2fasttext[0] = np.zeros(no_components)
        for word in self.word2id:
            self.word2fasttext[self.word2id[word]] = model.wv[word]
        return self.word2fasttext

if __name__ == '__main__':
    corpus = [
        [['Hello', 'this', 'tutorial', 'on', 'how', 'convert', 'word', 'integer', 'format'],
         ['this', 'beautiful', 'day'],
         ['Jack', 'going', 'office']],
        [['Hello', 'this', 'tutorial', 'on', 'how', 'convert', 'word', 'integer', 'format'],
         ['this', 'beautiful', 'day'],
         ['Jack', 'going', 'office']],
        [['Hello', 'this', 'tutorial', 'on', 'how', 'convert', 'word', 'integer', 'format'],
         ['this', 'beautiful', 'day'],
         ['Jack', 'going', 'office']],
    ]

    we = WordEmbeddings(corpus)
    docs = we.prepareDocuments()
    print(np.array(docs, dtype=object).shape)
    print(docs)

    w2v = we.word2vecEmbedding()
    print("Word2Vec:", w2v.shape)
    print(w2v)

    w2f = we.word2FastTextEmbeddings()
    print("FastText:", w2f.shape)
    print(w2f)

    w2g = we.word2GloVeEmbedding()
    print("GloVe:", w2g.shape)
    print(w2g)

    print("\n\nComparison for word ID 1:")
    print("Word2Vec:", w2v[1])
    print("FastText:", w2f[1])
    print("GloVe:", w2g[1])

NumPy version: 1.26.4
Gensim version: 4.3.3
Mittens version (GloVe only): 0.2


NameError: name 'os' is not defined

## tokenization

In [2]:
# Install required packages and download NLTK data
!pip install numpy==1.26.4 gensim==4.3.3 mittens==0.2 spacy==3.7.2 stop-words==2018.7.23 -q
!python -m spacy download en_core_web_sm -q
import nltk
nltk.download('stopwords', quiet=True)

import re
import spacy
from stop_words import get_stop_words
from nltk.corpus import stopwords
import numpy as np
from gensim.models import Word2Vec, FastText
from mittens import GloVe
import os

# Special characters dictionary
specialchar_dic = {
    "’": "'", "„": "\"", "“": "\"", "”": "\"", "«": "<<", "»": ">>",
    "…": "...", "—": "--", "¡": "!", "¿": "?", "©": " ", "–": " "
}

# Stop words function (cached globally)
def stopWordsEN():
    sw_stop_words = get_stop_words('en')
    sw_nltk = stopwords.words('english')
    sw_spacy = list(spacy.lang.en.stop_words.STOP_WORDS)
    sw_mallet = ['a', 'able', 'about', 'above', 'according', 'accordingly', 'across', 'actually', 'after', 'afterwards', 'again', 'against', 'all', 'allow', 'allows', 'almost', 'alone', 'along', 'already', 'also', 'although', 'always', 'am', 'among', 'amongst', 'an', 'and', 'another', 'any', 'anybody', 'anyhow', 'anyone', 'anything', 'anyway', 'anyways', 'anywhere', 'apart', 'appear', 'appreciate', 'appropriate', 'are', 'around', 'as', 'aside', 'ask', 'asking', 'associated', 'at', 'available', 'away', 'awfully', 'b', 'be', 'became', 'because', 'become', 'becomes', 'becoming', 'been', 'before', 'beforehand', 'behind', 'being', 'believe', 'below', 'beside', 'besides', 'best', 'better', 'between', 'beyond', 'both', 'brief', 'but', 'by', 'c', 'came', 'can', 'cannot', 'cant', 'cause', 'causes', 'certain', 'certainly', 'changes', 'clearly', 'co', 'com', 'come', 'comes', 'concerning', 'consequently', 'consider', 'considering', 'contain', 'containing', 'contains', 'corresponding', 'could', 'course', 'currently', 'd', 'definitely', 'described', 'despite', 'did', 'different', 'do', 'does', 'doing', 'done', 'down', 'downwards', 'during', 'e', 'each', 'edu', 'eg', 'eight', 'either', 'else', 'elsewhere', 'enough', 'entirely', 'especially', 'et', 'etc', 'even', 'ever', 'every', 'everybody', 'everyone', 'everything', 'everywhere', 'ex', 'exactly', 'example', 'except', 'f', 'far', 'few', 'fifth', 'first', 'five', 'followed', 'following', 'follows', 'for', 'former', 'formerly', 'forth', 'four', 'from', 'further', 'furthermore', 'g', 'get', 'gets', 'getting', 'given', 'gives', 'go', 'goes', 'going', 'gone', 'got', 'gotten', 'greetings', 'h', 'had', 'happens', 'hardly', 'has', 'have', 'having', 'he', 'hello', 'help', 'hence', 'her', 'here', 'hereafter', 'hereby', 'herein', 'hereupon', 'hers', 'herself', 'hi', 'him', 'himself', 'his', 'hither', 'hopefully', 'how', 'howbeit', 'however', 'i', 'ie', 'if', 'ignored', 'immediate', 'in', 'inasmuch', 'inc', 'indeed', 'indicate', 'indicated', 'indicates', 'inner', 'insofar', 'instead', 'into', 'inward', 'is', 'it', 'its', 'itself', 'j', 'just', 'k', 'keep', 'keeps', 'kept', 'know', 'knows', 'known', 'l', 'last', 'lately', 'later', 'latter', 'latterly', 'least', 'less', 'lest', 'let', 'like', 'liked', 'likely', 'little', 'look', 'looking', 'looks', 'ltd', 'm', 'mainly', 'many', 'may', 'maybe', 'me', 'mean', 'meanwhile', 'merely', 'might', 'more', 'moreover', 'most', 'mostly', 'much', 'must', 'my', 'myself', 'n', 'name', 'namely', 'nd', 'near', 'nearly', 'necessary', 'need', 'needs', 'neither', 'never', 'nevertheless', 'new', 'next', 'nine', 'no', 'nobody', 'non', 'none', 'noone', 'nor', 'normally', 'not', 'nothing', 'novel', 'now', 'nowhere', 'o', 'obviously', 'of', 'off', 'often', 'oh', 'ok', 'okay', 'old', 'on', 'once', 'one', 'ones', 'only', 'onto', 'or', 'other', 'others', 'otherwise', 'ought', 'our', 'ours', 'ourselves', 'out', 'outside', 'over', 'overall', 'own', 'p', 'particular', 'particularly', 'per', 'perhaps', 'placed', 'please', 'plus', 'possible', 'presumably', 'probably', 'provides', 'q', 'que', 'quite', 'qv', 'r', 'rather', 'rd', 're', 'really', 'reasonably', 'regarding', 'regardless', 'regards', 'relatively', 'respectively', 'right', 's', 'said', 'same', 'saw', 'say', 'saying', 'says', 'second', 'secondly', 'see', 'seeing', 'seem', 'seemed', 'seeming', 'seems', 'seen', 'self', 'selves', 'sensible', 'sent', 'serious', 'seriously', 'seven', 'several', 'shall', 'she', 'should', 'since', 'six', 'so', 'some', 'somebody', 'somehow', 'someone', 'something', 'sometime', 'sometimes', 'somewhat', 'somewhere', 'soon', 'sorry', 'specified', 'specify', 'specifying', 'still', 'sub', 'such', 'sup', 'sure', 't', 'take', 'taken', 'tell', 'tends', 'th', 'than', 'thank', 'thanks', 'thanx', 'that', 'thats', 'the', 'their', 'theirs', 'them', 'themselves', 'then', 'thence', 'there', 'thereafter', 'thereby', 'therefore', 'therein', 'theres', 'thereupon', 'these', 'they', 'think', 'third', 'this', 'thorough', 'thoroughly', 'those', 'though', 'three', 'through', 'throughout', 'thru', 'thus', 'to', 'together', 'too', 'took', 'toward', 'towards', 'tried', 'tries', 'truly', 'try', 'trying', 'twice', 'two', 'u', 'un', 'under', 'unfortunately', 'unless', 'unlikely', 'until', 'unto', 'up', 'upon', 'us', 'use', 'used', 'useful', 'uses', 'using', 'usually', 'uucp', 'v', 'value', 'various', 'very', 'via', 'viz', 'vs', 'w', 'want', 'wants', 'was', 'way', 'we', 'welcome', 'well', 'went', 'were', 'what', 'whatever', 'when', 'whence', 'whenever', 'where', 'whereafter', 'whereas', 'whereby', 'wherein', 'whereupon', 'wherever', 'whether', 'which', 'while', 'whither', 'who', 'whoever', 'whole', 'whom', 'whose', 'why', 'will', 'willing', 'wish', 'with', 'within', 'without', 'wonder', 'would', 'x', 'y', 'yes', 'yet', 'you', 'your', 'yours', 'yourself', 'yourselves', 'z', 'zero']
    return list(set(sw_stop_words + sw_nltk + sw_mallet + sw_spacy))

# Precompile regex and load Spacy model
punctuation = '!"#$%&\'()*+,./:;<=>?@[\\]^_`{|}~-'
specialchar_re = re.compile('(%s)' % '|'.join(specialchar_dic.keys()))
nlp = spacy.load("en_core_web_sm")
cachedStopWords_en = stopWordsEN()

class Tokenization:
    def applyFE(self, text):
        """Combine negation with words to reduce bias."""
        final_text = text.replace('cannot', 'can not').replace('can\'t', 'can not')
        final_text = final_text.replace('won\'t', 'will not').replace('n\'t', ' not').replace(' not ', ' not')
        return final_text

    def removeStopWords(self, text):
        return ' '.join([word for word in text.split() if word not in cachedStopWords_en])

    def removePunctuation(self, text, punctuation=punctuation):
        for c in punctuation:
            text = text.replace(c, ' ')
        return text

    def replaceUTF8Char(self, text, specialchars=specialchar_dic):
        def replace(match):
            return specialchars[match.group(0)]
        return specialchar_re.sub(replace, text)

    def createCorpus(self, text, remove_punctuation=True, remove_stopwords=True, apply_FE=True):
        corpus = []
        try:
            text = self.replaceUTF8Char(text).replace("\n", " ")
            doc = nlp(text)
            processed_text = ' '.join([t.lemma_ if t.lemma_ != '-PRON-' else t.text if not t.ent_type_ else t.text for t in doc])
            processed_text = processed_text.replace("\s\s+", ' ')

            doc = nlp(processed_text.lower())
            rawText = not (remove_punctuation or remove_stopwords or apply_FE)

            for sentence in doc.sents:
                sent = str(sentence.text)
                if len(sent) == 0:
                    continue
                if not rawText:
                    if apply_FE:
                        sent = self.applyFE(text=sent)
                    if remove_punctuation:
                        sent = self.removePunctuation(text=sent)
                    if remove_stopwords:
                        sent = self.removeStopWords(text=sent)
                sent = sent.lower().split()
                if sent:
                    corpus.append(sent)
        except Exception as exp:
            print('exception=', str(exp))
            print('text=', text)
        return corpus

    def __del__(self):
        print("Destructor Tokenization")

class WordEmbeddings:
    def __init__(self, corpus, normalize_tfidf=False):
        self.corpus = corpus
        self.normalize_tfidf = normalize_tfidf
        self.documents = []
        self.sentences = []
        self.word2id = {}
        self.no_words = 0
        self.max_size = 0
        self.no_docs = len(self.corpus)

    def prepareDocuments(self):
        word_id = 1
        for document in self.corpus:
            doc = []
            for sentence in document:
                self.sentences.append(sentence)
                for word in sentence:
                    if self.word2id.get(word) is None:
                        self.word2id[word] = word_id
                        word_id += 1
                    doc.append(self.word2id[word])
            if self.max_size < len(doc):
                self.max_size = len(doc)
            self.documents.append(doc)

        self.no_words = len(self.word2id) + 1
        return self.documents

    def word2vecEmbedding(self, window_size=10, no_components=128, epochs=10, workers=os.cpu_count(), sg=0, learning_rate=0.05):
        self.word2vec = np.empty(shape=(self.no_words, no_components))
        model = Word2Vec(self.sentences, vector_size=no_components, window=window_size, min_count=1,
                         workers=workers, sg=sg, alpha=learning_rate, epochs=epochs)
        self.word2vec[0] = np.zeros(no_components)
        for word in self.word2id:
            self.word2vec[self.word2id[word]] = model.wv[word]
        return self.word2vec

    def word2GloVeEmbedding(self, window_size=10, no_components=128, epochs=10, workers=os.cpu_count(), learning_rate=0.05):
        self.word2glove = np.empty(shape=(self.no_words, no_components))
        model = GloVe(n=no_components, learning_rate=learning_rate)

        vocab = list(self.word2id.keys())
        cooc_matrix = np.zeros((len(vocab), len(vocab)))
        for sentence in self.sentences:
            for i, word in enumerate(sentence):
                word_idx = self.word2id[word] - 1
                for j in range(max(0, i - window_size), min(len(sentence), i + window_size + 1)):
                    if i != j:
                        cooc_idx = self.word2id[sentence[j]] - 1
                        cooc_matrix[word_idx, cooc_idx] += 1

        embeddings = model.fit(cooc_matrix)
        self.word2glove[0] = np.zeros(no_components)
        for word, idx in self.word2id.items():
            self.word2glove[idx] = embeddings[idx - 1]
        return self.word2glove

    def word2FastTextEmbeddings(self, window_size=10, no_components=128, epochs=10, workers=os.cpu_count(), sg=0, learning_rate=0.05):
        self.word2fasttext = np.empty(shape=(self.no_words, no_components))
        model = FastText(self.sentences, vector_size=no_components, window=window_size, min_count=1,
                         workers=workers, sg=sg, alpha=learning_rate, epochs=epochs)
        self.word2fasttext[0] = np.zeros(no_components)
        for word in self.word2id:
            self.word2fasttext[self.word2id[word]] = model.wv[word]
        return self.word2fasttext

if __name__ == '__main__':
    # Tokenization example
    tkn = Tokenization()
    text1 = "Apple data-intensive is looking at buying U.K. startup for $1 billion. This is great! The new D.P. model is funcitonal and ready"
    corpus1 = tkn.createCorpus(text1)
    print("Corpus 1:", corpus1)

    # Larger text example
    text2 = """The lion may be known as the king of the jungle, but lions do not live in jungles. They’re the rulers of the African savannahs that are covered in brown grasses and speckled with sparse trees. Lions’ coloring helps them blend in perfectly with the tall grass so they can ambush their prey as best as possible. And lions are ferocious. Although they’re one of the most powerful predators on land, lions are in danger. Hunters and poachers target lions to prove to the world their machismo.\n\nAnd while hunters seek to wipe lions off the face of the earth to bolster their egos, the Kevin Richardson Wildlife Sanctuary hopes to stop them and protect the big African cat at all cost.\n\nRichardson has earned the nickname the “Lion Whisperer” for a reason. He aims to educate the world about lions. And for those lucky enough to volunteer alongside Richardson, he encourages them to learn more about lions and help protect the wild species.\n\n“To raise awareness, Kevin has now set up his YouTube Channel ‘LionWhispererTV’. The channel is all about raising awareness about not only the declining numbers of lions but also how this rapid decrease is happening. By watching these videos, you are directly contributing to our scheme of land acquisition,” he writes in his bio.\n\nAs part of the volunteer program, Richardson hosts a “volunteer enrichment and lion enrichment” walk. As the name suggests, Richardson takes his group of volunteers out into the savannah of South Africa to hang out with two lions. There, the volunteers meet a male lion, Bobcat, and a female lioness, Gabby. Both lions look ferocious, but are truly “affectionate,” at least that’s what Richardson says. And remember, he’s the lion whisperer, so he’s got an advantage with these deadly big cats.\n\nAs Richardson showers the pair of lions with love, the volunteers stay locked in the truck, unwilling to put their lives in danger. And while they are in the vehicle, the lions are just feet from them – and if something goes wrong, they could wind up injured anyway.\n\nRichardson shared the video on his “The Lion Whisperer” YouTube channel. With more than one million hits, this video has proven to be one of his most famous.\n\nThe video describes the moment caught on tape as follows:\n\n“It’s an enrichment walk for both the volunteers and the lions as Kevin shows off his lovely lions as well as giving some amazing lion facts to the volunteers.”\n\nViewers like you are overwhelmed with the magnificent footage. The following are a few comments shared on the video.\n\n“I hope to someday volunteer there with Kevin. I believe in the work and his perspective about conservation. This video makes me want to all the more! Bobcat and Gabby are lovely lions.” “Every time I watch a one of your videos I somehow end up smiling from ear to ear!” “That was so beautiful, wish I could rub my head against a lion.”\n\nTake a moment to watch this video. Would you ever want to volunteer with Kevin Richardson and his lions?"""
    corpus2 = tkn.createCorpus(text2, remove_stopwords=False)
    print("Corpus 2:", corpus2)

    # Generate embeddings from Corpus 2
    we = WordEmbeddings(corpus2)
    docs = we.prepareDocuments()
    print("\nDocuments shape:", np.array(docs, dtype=object).shape)
    print("Documents:", docs)

    w2v = we.word2vecEmbedding()
    print("Word2Vec shape:", w2v.shape)
    print("Word2Vec embeddings:", w2v[:5])  # Print first 5 for brevity

    w2f = we.word2FastTextEmbeddings()
    print("FastText shape:", w2f.shape)
    print("FastText embeddings:", w2f[:5])

    w2g = we.word2GloVeEmbedding()
    print("GloVe shape:", w2g.shape)
    print("GloVe embeddings:", w2g[:5])

    print("\nComparison for word 'lion' (ID varies):")
    lion_id = we.word2id.get('lion', -1)
    if lion_id != -1:
        print("Word2Vec:", w2v[lion_id])
        print("FastText:", w2f[lion_id])
        print("GloVe:", w2g[lion_id])
    else:
        print("'lion' not found in vocabulary")

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.6/6.6 MB[0m [31m66.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.0/57.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m920.2/920.2 kB[0m [31m41.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.1/50.1 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m51.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.0/45.0 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for stop-words (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━



Corpus 1: [['apple', 'data', 'intensive', 'buy', 'startup', '1', 'billion'], ['great'], ['model', 'funcitonal', 'ready']]
Corpus 2: [['the', 'lion', 'may', 'be', 'know', 'as', 'the', 'king', 'of', 'the', 'jungle', 'but', 'lion', 'do', 'notlive', 'in', 'jungle'], ['they', 'be', 'the', 'ruler', 'of', 'the', 'african', 'savannah', 'that', 'be', 'cover', 'in', 'brown', 'grass', 'and', 'speckle', 'with', 'sparse', 'tree'], ['lion', 'coloring', 'help', 'they', 'blend', 'in', 'perfectly', 'with', 'the', 'tall', 'grass', 'so', 'they', 'can', 'ambush', 'their', 'prey', 'as', 'well', 'as', 'possible'], ['and', 'lion', 'be', 'ferocious'], ['although', 'they', 'be', 'one', 'of', 'the', 'most', 'powerful', 'predator', 'on', 'land', 'lion', 'be', 'in', 'danger'], ['hunter', 'and', 'poacher', 'target', 'lion', 'to', 'prove', 'to', 'the', 'world', 'their', 'machismo'], ['and', 'while', 'hunter', 'seek', 'to', 'wipe', 'lion', 'off', 'the', 'face', 'of', 'the', 'earth', 'to', 'bolster', 'their', 'ego', 

Iteration 100: error 0.1557

FastText shape: (26, 128)
FastText embeddings: [[ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000

## Embedding



In [3]:
# Install dependencies
!pip install numpy==1.26.4 gensim==4.3.3 spacy==3.7.2 stop-words==2018.7.23 pandas scipy -q
!python -m spacy download en_core_web_sm -q
import nltk
nltk.download('stopwords', quiet=True)

import pandas as pd
import numpy as np
from scipy import io as sio
import re
import spacy
from stop_words import get_stop_words
from nltk.corpus import stopwords
from gensim.models import Word2Vec, FastText
from mittens import GloVe
from multiprocessing import cpu_count
from concurrent.futures import ProcessPoolExecutor
from sklearn.preprocessing import StandardScaler
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Define stop words (simplified for brevity; use your full list)
def stopWordsEN():
    sw_stop_words = get_stop_words('en')
    sw_nltk = stopwords.words('english')
    sw_spacy = list(spacy.lang.en.stop_words.STOP_WORDS)
    return list(set(sw_stop_words + sw_nltk + sw_spacy))

# Tokenization setup
specialchar_dic = {
    "’": "'", "„": "\"", "“": "\"", "”": "\"", "«": "<<", "»": ">>",
    "…": "...", "—": "--", "¡": "!", "¿": "?", "©": " ", "–": " "
}
punctuation = '!"#$%&\'()*+,./:;<=>?@[\\]^_`{|}~-'
specialchar_re = re.compile('(%s)' % '|'.join(specialchar_dic.keys()))
nlp = spacy.load("en_core_web_sm")
cachedStopWords_en = stopWordsEN()

class Tokenization:
    def applyFE(self, text):
        final_text = text.replace('cannot', 'can not').replace('can\'t', 'can not')
        final_text = final_text.replace('won\'t', 'will not').replace('n\'t', ' not').replace(' not ', ' not')
        return final_text

    def removeStopWords(self, text):
        return ' '.join([word for word in text.split() if word not in cachedStopWords_en])

    def removePunctuation(self, text, punctuation=punctuation):
        for c in punctuation:
            text = text.replace(c, ' ')
        return text

    def replaceUTF8Char(self, text, specialchars=specialchar_dic):
        def replace(match):
            return specialchars[match.group(0)]
        return specialchar_re.sub(replace, text)

    def createCorpus(self, text, remove_punctuation=True, remove_stopwords=True, apply_FE=True):
        if pd.isna(text):
            text = ""
        corpus = []
        try:
            text = self.replaceUTF8Char(text).replace("\n", " ")
            doc = nlp(text)
            processed_text = ' '.join([t.lemma_ if t.lemma_ != '-PRON-' else t.text if not t.ent_type_ else t.text for t in doc])
            processed_text = processed_text.replace("\s\s+", ' ')
            doc = nlp(processed_text.lower())
            rawText = not (remove_punctuation or remove_stopwords or apply_FE)
            for sentence in doc.sents:
                sent = str(sentence.text)
                if len(sent) == 0:
                    continue
                if not rawText:
                    if apply_FE:
                        sent = self.applyFE(text=sent)
                    if remove_punctuation:
                        sent = self.removePunctuation(text=sent)
                    if remove_stopwords:
                        sent = self.removeStopWords(text=sent)
                sent = sent.lower().split()
                if sent:
                    corpus.append(sent)
        except Exception as exp:
            print('exception=', str(exp))
            print('text=', text)
        return corpus

class WordEmbeddings:
    def __init__(self, corpus):
        self.corpus = corpus
        self.documents = []
        self.sentences = []
        self.word2id = {}
        self.no_words = 0
        self.max_size = 0
        self.no_docs = len(self.corpus)

    def prepareDocuments(self):
        word_id = 1
        for document in self.corpus:
            doc = []
            for sentence in document:
                self.sentences.append(sentence)
                for word in sentence:
                    if self.word2id.get(word) is None:
                        self.word2id[word] = word_id
                        word_id += 1
                    doc.append(self.word2id[word])
            if self.max_size < len(doc):
                self.max_size = len(doc)
            self.documents.append(doc)
        self.no_words = len(self.word2id) + 1
        return self.documents

    def word2vecEmbedding(self, window_size=10, no_components=128, epochs=10, workers=cpu_count(), sg=0):
        self.word2vec = np.empty(shape=(self.no_words, no_components))
        model = Word2Vec(self.sentences, vector_size=no_components, window=window_size, min_count=1,
                         workers=workers, sg=sg, epochs=epochs)
        self.word2vec[0] = np.zeros(no_components)
        for word in self.word2id:
            self.word2vec[self.word2id[word]] = model.wv[word]
        return self.word2vec

    def word2GloVeEmbedding(self, window_size=10, no_components=128):
        self.word2glove = np.empty(shape=(self.no_words, no_components))
        model = GloVe(n=no_components)
        vocab = list(self.word2id.keys())
        cooc_matrix = np.zeros((len(vocab), len(vocab)))
        for sentence in self.sentences:
            for i, word in enumerate(sentence):
                word_idx = self.word2id[word] - 1
                for j in range(max(0, i - window_size), min(len(sentence), i + window_size + 1)):
                    if i != j:
                        cooc_idx = self.word2id[sentence[j]] - 1
                        cooc_matrix[word_idx, cooc_idx] += 1
        embeddings = model.fit(cooc_matrix)
        self.word2glove[0] = np.zeros(no_components)
        for word, idx in self.word2id.items():
            self.word2glove[idx] = embeddings[idx - 1]
        return self.word2glove

    def word2FastTextEmbeddings(self, window_size=10, no_components=128, epochs=10, workers=cpu_count(), sg=0):
        self.word2fasttext = np.empty(shape=(self.no_words, no_components))
        model = FastText(self.sentences, vector_size=no_components, window=window_size, min_count=1,
                         workers=workers, sg=sg, epochs=epochs)
        self.word2fasttext[0] = np.zeros(no_components)
        for word in self.word2id:
            self.word2fasttext[self.word2id[word]] = model.wv[word]
        return self.word2fasttext

def processElement(elem):
    idx, text = elem  # Unpack as (index, text)
    tkn = Tokenization()
    text = tkn.createCorpus(text, remove_stopwords=False)
    return idx, text

if __name__ == "__main__":
    # Load dataset
    file_path = '/content/drive/MyDrive/Projects/Hayat/facebook-fact-check.csv'
    df = pd.read_csv(file_path, encoding='latin-1')
    print("Dataset Head:")
    print(df.head())
    print("\nDataset Info:")
    print(df.info())
    print("\nMissing Values:")
    print(df.isnull().sum())

    # Label mapping
    label2id = {
        'mostly true': 0,
        'mixture of true and false': 1,
        'no factual content': 1,
        'mostly false': 1
    }
    df['Rating'] = df['Rating'].map(label2id)
    y = df['Rating'].astype(int).to_numpy()
    sio.savemat('labels.mat', {'y': y})

    # Network features
    network_cols = ['share_count', 'reaction_count', 'comment_count']
    X_network = df[network_cols].fillna(0).to_numpy()
    scaler_std = StandardScaler()
    X_net_std = scaler_std.fit_transform(X_network)
    X_net_std = X_net_std.reshape((X_net_std.shape[0], 1, X_net_std.shape[1]))
    print("\nX_network shape:", X_network.shape)
    print("X_net_std shape:", X_net_std.shape)
    sio.savemat('network.mat', {'X_net_std': X_net_std})

    # Tokenization
    print("\nStart Tokenization")
    # Use row indices (0 to 2281) paired with Context Post
    texts = list(enumerate(df['Context Post'].tolist()))
    corpus = [None] * len(texts)
    no_threads = cpu_count() - 1
    with ProcessPoolExecutor(max_workers=no_threads) as worker:
        for result in worker.map(processElement, texts):
            if result:
                corpus[result[0]] = result[1]

    print("Corpus sample (first 5):")
    for idx, doc in enumerate(corpus[:5]):
        print(idx, doc)

    # Word Embeddings
    print("\nStart Document Tokenization")
    we = WordEmbeddings(corpus)
    documents = we.prepareDocuments()
    vocabulary_size = we.no_words
    max_size = we.max_size
    print("Vocabulary size:", vocabulary_size)
    print("Max Document size:", max_size)

    X_docs = []
    for document in documents:
        doc_size = len(document)
        X_docs.append(document + [0] * (max_size - doc_size))
    X_docs = np.array(X_docs)
    sio.savemat('corpus.mat', {'X': X_docs})

    print("Start W2V CBOW")
    w2v_cbow = we.word2vecEmbedding(sg=0)
    sio.savemat('w2v_cbow.mat', {'w2v_cbow': w2v_cbow})

    print("Start W2V SG")
    w2v_sg = we.word2vecEmbedding(sg=1)
    sio.savemat('w2v_sg.mat', {'w2v_sg': w2v_sg})

    print("Start FT CBOW")
    ft_cbow = we.word2FastTextEmbeddings(sg=0)
    sio.savemat('ft_cbow.mat', {'ft_cbow': ft_cbow})

    print("Start FT SG")
    ft_sg = we.word2FastTextEmbeddings(sg=1)
    sio.savemat('ft_sg.mat', {'ft_sg': ft_sg})

    print("Start GLOVE")
    glove = we.word2GloVeEmbedding()
    sio.savemat('glove.mat', {'glove': glove})

[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Dataset Head:
     account_id       post_id    Category               Page  \
0  1.840000e+14  1.040000e+15  mainstream  ABC News Politics   
1  1.840000e+14  1.040000e+15  mainstream  ABC News Politics   
2  1.840000e+14  1.040000e+15  mainstream  ABC News Politics   
3  1.840000e+14  1.040000e+15  mainstream  ABC News Politics   
4  1.840000e+14  1.040000e+15  mainstream  ABC News Politics   

                                            Post URL Date Published Post Type  \
0  https://www.facebook.com/ABCNewsPolitics/posts...      9/19/2016     video   
1  https://www.facebook.com/ABCNewsPolitics/posts...      9/19/2016      link   
2  https://www.facebook.com/ABCNewsPolitics/posts...      9/19/2016      link   
3  https://www.facebook.com/ABCNew

Iteration 100: error 116.4637

- Dataset Size: 2282 samples.

- Network Features: X_net_std has shape (2282, 1, 3) (from share_count, reaction_count, comment_count).

- Text Data: X_docs will have shape (2282, 117) (max document size is 117).

- Vocabulary Size: 4023 unique words (including padding token 0).

- Embeddings: Successfully generated w2v_cbow, w2v_sg, ft_cbow, ft_sg, and glove, each with 128 dimensions (default no_components).



## Main Script

In [4]:
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)
Collecting tensorboard~=2.19.0 (from tensorflow)
  Downloading tensorboard-2.19.0-py3-none-any.whl.metadata (1.8 kB)
Collecting ml-dtypes<1.0.0,>=0.5.1 (from tensorflow)
  Downloading ml_dtypes-0.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (21 kB)
Downloading tensorflow-2.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (644.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m644.9/644.9 MB[0m [31m863.0 kB/s[0m eta [36m0:00:00[0m
[?25hDownloading ml_dtypes-0.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.7/4.7 MB[0m [31m53.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tensorboard-2.19.0-py3-none-any.whl (5.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m76.5 MB

In [None]:
import random
import time
import os
import numpy as np
from scipy import io as sio
from sklearn.metrics import accuracy_score, recall_score, precision_score
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Embedding, LSTM, Bidirectional, Input, Concatenate, Conv1D, Flatten, MaxPooling1D, Reshape
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Hyperparameters
num_classes = 2
batch_size = 256
epochs_n = 5
units = 128
filters = int(units / 2)
no_attributes_lstm = units
kernel_size_lstm = int(no_attributes_lstm / 2)
no_attributes_bilstm = int(units * 2)
kernel_size_bilstm = int(no_attributes_bilstm / 2)

execution = {}
accuracies = {}
precisions = {}
recalls = {}

def evaluate(y_test, y_pred, modelName='LSTM', wordemb='w2v_sg', iters=0):
    y_pred_norm = []
    for elem in y_pred:
        line = [0] * len(elem)
        try:
            elem[np.isnan(elem)] = 0
            line[elem.tolist().index(max(elem.tolist()))] = 1
        except:
            print("Error for getting predicted class")
            print(elem.tolist())
            line[random.randint(0, len(elem)-1)] = 1
        y_pred_norm.append(line)
    y_p = np.argmax(np.array(y_pred_norm), 1)
    y_t = np.argmax(np.array(y_test), 1)
    accuracy = accuracy_score(y_t, y_p)
    accuracies[wordemb][modelName].append(accuracy)
    precision = precision_score(y_t, y_p, average='weighted')
    precisions[wordemb][modelName].append(precision)
    recall = recall_score(y_t, y_p, average='weighted')
    recalls[wordemb][modelName].append(recall)
    print(f"{modelName} {wordemb} Accuracy {accuracy:.4f}")
    print(f"{modelName} {wordemb} Precision {precision:.4f}")
    print(f"{modelName} {wordemb} Recall {recall:.4f}")
    return y_p, y_t

# Model definitions with corrected shapes
def modelContentNetworkLSTM_00CNN(X_train_docs, X_val_docs, X_test_docs, X_train_net, X_val_net, X_test_net, y_train, y_val, y_test, w2v, num_classes, wordemb, idx):
    input_docs = Input(shape=(X_train_docs.shape[1],), name='DOCS_INPUT')  # Tuple (117,)
    model_docs = Embedding(input_dim=4023, output_dim=units, weights=[w2v], trainable=False)(input_docs)
    model_docs = LSTM(units=units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)(model_docs)
    model_docs = Flatten()(model_docs)
    input_net = Input(shape=(1, 3), name='NETS_INPUT')
    model_net = LSTM(units=units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)(input_net)
    model_net = Flatten()(model_net)
    combined = Concatenate()([model_docs, model_net])
    output = Dense(units=num_classes, activation='softmax')(combined)
    model = Model(inputs=[input_docs, input_net], outputs=output, name="LSTM-00CNN-ContentNets")
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
    start_time = time.time()
    model.fit(x=[X_train_docs, X_train_net], y=y_train, epochs=epochs_n, verbose=1,
              validation_data=([X_val_docs, X_val_net], y_val), batch_size=batch_size, callbacks=[es])
    end_time = time.time()
    y_pred = model.predict([X_test_docs, X_test_net], verbose=0)
    evaluate(y_test, y_pred, modelName=model.name, wordemb=wordemb, iters=idx)
    exc_time = end_time - start_time
    execution[wordemb][model.name].append(exc_time)
    print(f"Time taken to train: {exc_time:.2f} seconds")

def modelContentNetworkLSTM_01CNN(X_train_docs, X_val_docs, X_test_docs, X_train_net, X_val_net, X_test_net, y_train, y_val, y_test, w2v, num_classes, wordemb, idx):
    input_docs = Input(shape=(X_train_docs.shape[1],), name='DOCS_INPUT')
    model_docs = Embedding(input_dim=4023, output_dim=units, weights=[w2v], trainable=False)(input_docs)
    model_docs = LSTM(units=units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)(model_docs)
    model_docs = Flatten()(model_docs)
    input_net = Input(shape=(1, 3), name='NETS_INPUT')
    model_net = LSTM(units=units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)(input_net)
    model_net = Reshape((no_attributes_lstm, 1))(model_net)
    model_net = Conv1D(filters=filters, kernel_size=kernel_size_lstm, activation='relu')(model_net)
    model_net = MaxPooling1D()(model_net)
    model_net = Flatten()(model_net)
    combined = Concatenate()([model_docs, model_net])
    output = Dense(units=num_classes, activation='softmax')(combined)
    model = Model(inputs=[input_docs, input_net], outputs=output, name="LSTM-01CNN-ContentNets")
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
    start_time = time.time()
    model.fit(x=[X_train_docs, X_train_net], y=y_train, epochs=epochs_n, verbose=1,
              validation_data=([X_val_docs, X_val_net], y_val), batch_size=batch_size, callbacks=[es])
    end_time = time.time()
    y_pred = model.predict([X_test_docs, X_test_net], verbose=0)
    evaluate(y_test, y_pred, modelName=model.name, wordemb=wordemb, iters=idx)
    exc_time = end_time - start_time
    execution[wordemb][model.name].append(exc_time)
    print(f"Time taken to train: {exc_time:.2f} seconds")

def modelContentNetworkLSTM_10CNN(X_train_docs, X_val_docs, X_test_docs, X_train_net, X_val_net, X_test_net, y_train, y_val, y_test, w2v, num_classes, wordemb, idx):
    input_docs = Input(shape=(X_train_docs.shape[1],), name='DOCS_INPUT')
    model_docs = Embedding(input_dim=4023, output_dim=units, weights=[w2v], trainable=False)(input_docs)
    model_docs = LSTM(units=units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)(model_docs)
    model_docs = Conv1D(filters=int(filters/2), kernel_size=int(kernel_size_lstm/2), activation='relu')(model_docs)
    model_docs = MaxPooling1D()(model_docs)
    model_docs = Flatten()(model_docs)
    input_net = Input(shape=(1, 3), name='NETS_INPUT')
    model_net = LSTM(units=units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)(input_net)
    model_net = Flatten()(model_net)
    combined = Concatenate()([model_docs, model_net])
    output = Dense(units=num_classes, activation='softmax')(combined)
    model = Model(inputs=[input_docs, input_net], outputs=output, name="LSTM-10CNN-ContentNets")
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
    start_time = time.time()
    model.fit(x=[X_train_docs, X_train_net], y=y_train, epochs=epochs_n, verbose=1,
              validation_data=([X_val_docs, X_val_net], y_val), batch_size=batch_size, callbacks=[es])
    end_time = time.time()
    y_pred = model.predict([X_test_docs, X_test_net], verbose=0)
    evaluate(y_test, y_pred, modelName=model.name, wordemb=wordemb, iters=idx)
    exc_time = end_time - start_time
    execution[wordemb][model.name].append(exc_time)
    print(f"Time taken to train: {exc_time:.2f} seconds")

def modelContentNetworkLSTM_11CNN(X_train_docs, X_val_docs, X_test_docs, X_train_net, X_val_net, X_test_net, y_train, y_val, y_test, w2v, num_classes, wordemb, idx):
    input_docs = Input(shape=(X_train_docs.shape[1],), name='DOCS_INPUT')
    model_docs = Embedding(input_dim=4023, output_dim=units, weights=[w2v], trainable=False)(input_docs)
    model_docs = LSTM(units=units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)(model_docs)
    model_docs = Conv1D(filters=int(filters/2), kernel_size=int(kernel_size_lstm/2), activation='relu')(model_docs)
    model_docs = MaxPooling1D()(model_docs)
    model_docs = Flatten()(model_docs)
    input_net = Input(shape=(1, 3), name='NETS_INPUT')
    model_net = LSTM(units=units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)(input_net)
    model_net = Reshape((no_attributes_lstm, 1))(model_net)
    model_net = Conv1D(filters=filters, kernel_size=kernel_size_lstm, activation='relu')(model_net)
    model_net = MaxPooling1D()(model_net)
    model_net = Flatten()(model_net)
    combined = Concatenate()([model_docs, model_net])
    output = Dense(units=num_classes, activation='softmax')(combined)
    model = Model(inputs=[input_docs, input_net], outputs=output, name="LSTM-11CNN-ContentNets")
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
    start_time = time.time()
    model.fit(x=[X_train_docs, X_train_net], y=y_train, epochs=epochs_n, verbose=1,
              validation_data=([X_val_docs, X_val_net], y_val), batch_size=batch_size, callbacks=[es])
    end_time = time.time()
    y_pred = model.predict([X_test_docs, X_test_net], verbose=0)
    evaluate(y_test, y_pred, modelName=model.name, wordemb=wordemb, iters=idx)
    exc_time = end_time - start_time
    execution[wordemb][model.name].append(exc_time)
    print(f"Time taken to train: {exc_time:.2f} seconds")

def modelContentLSTM(X_train_docs, X_val_docs, X_test_docs, y_train, y_val, y_test, w2v, num_classes, wordemb, idx):
    input_docs = Input(shape=(X_train_docs.shape[1],), name='DOCS_INPUT')
    model_docs = Embedding(input_dim=4023, output_dim=units, weights=[w2v], trainable=False)(input_docs)
    model_docs = LSTM(units=units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)(model_docs)
    model_docs = Flatten()(model_docs)
    output = Dense(units=num_classes, activation='softmax')(model_docs)
    model = Model(inputs=input_docs, outputs=output, name="LSTM-Content")
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
    start_time = time.time()
    model.fit(x=X_train_docs, y=y_train, epochs=epochs_n, verbose=1,
              validation_data=(X_val_docs, y_val), batch_size=batch_size, callbacks=[es])
    end_time = time.time()
    y_pred = model.predict(X_test_docs, verbose=0)
    evaluate(y_test, y_pred, modelName=model.name, wordemb=wordemb, iters=idx)
    exc_time = end_time - start_time
    execution[wordemb][model.name].append(exc_time)
    print(f"Time taken to train: {exc_time:.2f} seconds")

def modelContentLSTMCNN(X_train_docs, X_val_docs, X_test_docs, y_train, y_val, y_test, w2v, num_classes, wordemb, idx):
    input_docs = Input(shape=(X_train_docs.shape[1],), name='DOCS_INPUT')
    model_docs = Embedding(input_dim=4023, output_dim=units, weights=[w2v], trainable=False)(input_docs)
    model_docs = LSTM(units=units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)(model_docs)
    model_docs = Conv1D(filters=int(filters/2), kernel_size=int(kernel_size_lstm/2), activation='relu')(model_docs)
    model_docs = MaxPooling1D()(model_docs)
    model_docs = Flatten()(model_docs)
    output = Dense(units=num_classes, activation='softmax')(model_docs)
    model = Model(inputs=input_docs, outputs=output, name="LSTM-CNN-Content")
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
    start_time = time.time()
    model.fit(x=X_train_docs, y=y_train, epochs=epochs_n, verbose=1,
              validation_data=(X_val_docs, y_val), batch_size=batch_size, callbacks=[es])
    end_time = time.time()
    y_pred = model.predict(X_test_docs, verbose=0)
    evaluate(y_test, y_pred, modelName=model.name, wordemb=wordemb, iters=idx)
    exc_time = end_time - start_time
    execution[wordemb][model.name].append(exc_time)
    print(f"Time taken to train: {exc_time:.2f} seconds")

def modelContentNetworkBiLSTM_00CNN(X_train_docs, X_val_docs, X_test_docs, X_train_net, X_val_net, X_test_net, y_train, y_val, y_test, w2v, num_classes, wordemb, idx):
    input_docs = Input(shape=(X_train_docs.shape[1],), name='DOCS_INPUT')
    model_docs = Embedding(input_dim=4023, output_dim=units, weights=[w2v], trainable=False)(input_docs)
    model_docs = Bidirectional(LSTM(units=units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))(model_docs)
    model_docs = Flatten()(model_docs)
    input_net = Input(shape=(1, 3), name='NETS_INPUT')
    model_net = Bidirectional(LSTM(units=units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))(input_net)
    model_net = Flatten()(model_net)
    combined = Concatenate()([model_docs, model_net])
    output = Dense(units=num_classes, activation='softmax')(combined)
    model = Model(inputs=[input_docs, input_net], outputs=output, name="BiLSTM-00CNN-ContentNets")
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
    start_time = time.time()
    model.fit(x=[X_train_docs, X_train_net], y=y_train, epochs=epochs_n, verbose=1,
              validation_data=([X_val_docs, X_val_net], y_val), batch_size=batch_size, callbacks=[es])
    end_time = time.time()
    y_pred = model.predict([X_test_docs, X_test_net], verbose=0)
    evaluate(y_test, y_pred, modelName=model.name, wordemb=wordemb, iters=idx)
    exc_time = end_time - start_time
    execution[wordemb][model.name].append(exc_time)
    print(f"Time taken to train: {exc_time:.2f} seconds")

def modelContentNetworkBiLSTM_01CNN(X_train_docs, X_val_docs, X_test_docs, X_train_net, X_val_net, X_test_net, y_train, y_val, y_test, w2v, num_classes, wordemb, idx):
    input_docs = Input(shape=(X_train_docs.shape[1],), name='DOCS_INPUT')
    model_docs = Embedding(input_dim=4023, output_dim=units, weights=[w2v], trainable=False)(input_docs)
    model_docs = Bidirectional(LSTM(units=units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))(model_docs)
    model_docs = Flatten()(model_docs)
    input_net = Input(shape=(1, 3), name='NETS_INPUT')
    model_net = Bidirectional(LSTM(units=units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))(input_net)
    model_net = Reshape((no_attributes_bilstm, 1))(model_net)
    model_net = Conv1D(filters=filters, kernel_size=kernel_size_bilstm, activation='relu')(model_net)
    model_net = MaxPooling1D()(model_net)
    model_net = Flatten()(model_net)
    combined = Concatenate()([model_docs, model_net])
    output = Dense(units=num_classes, activation='softmax')(combined)
    model = Model(inputs=[input_docs, input_net], outputs=output, name="BiLSTM-01CNN-ContentNets")
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
    start_time = time.time()
    model.fit(x=[X_train_docs, X_train_net], y=y_train, epochs=epochs_n, verbose=1,
              validation_data=([X_val_docs, X_val_net], y_val), batch_size=batch_size, callbacks=[es])
    end_time = time.time()
    y_pred = model.predict([X_test_docs, X_test_net], verbose=0)
    evaluate(y_test, y_pred, modelName=model.name, wordemb=wordemb, iters=idx)
    exc_time = end_time - start_time
    execution[wordemb][model.name].append(exc_time)
    print(f"Time taken to train: {exc_time:.2f} seconds")

def modelContentNetworkBiLSTM_10CNN(X_train_docs, X_val_docs, X_test_docs, X_train_net, X_val_net, X_test_net, y_train, y_val, y_test, w2v, num_classes, wordemb, idx):
    input_docs = Input(shape=(X_train_docs.shape[1],), name='DOCS_INPUT')
    model_docs = Embedding(input_dim=4023, output_dim=units, weights=[w2v], trainable=False)(input_docs)
    model_docs = Bidirectional(LSTM(units=units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))(model_docs)
    model_docs = Conv1D(filters=int(filters/2), kernel_size=int(kernel_size_bilstm/2), activation='relu')(model_docs)
    model_docs = MaxPooling1D()(model_docs)
    model_docs = Flatten()(model_docs)
    input_net = Input(shape=(1, 3), name='NETS_INPUT')
    model_net = Bidirectional(LSTM(units=units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))(input_net)
    model_net = Flatten()(model_net)
    combined = Concatenate()([model_docs, model_net])
    output = Dense(units=num_classes, activation='softmax')(combined)
    model = Model(inputs=[input_docs, input_net], outputs=output, name="BiLSTM-10CNN-ContentNets")
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
    start_time = time.time()
    model.fit(x=[X_train_docs, X_train_net], y=y_train, epochs=epochs_n, verbose=1,
              validation_data=([X_val_docs, X_val_net], y_val), batch_size=batch_size, callbacks=[es])
    end_time = time.time()
    y_pred = model.predict([X_test_docs, X_test_net], verbose=0)
    evaluate(y_test, y_pred, modelName=model.name, wordemb=wordemb, iters=idx)
    exc_time = end_time - start_time
    execution[wordemb][model.name].append(exc_time)
    print(f"Time taken to train: {exc_time:.2f} seconds")

def modelContentNetworkBiLSTM_11CNN(X_train_docs, X_val_docs, X_test_docs, X_train_net, X_val_net, X_test_net, y_train, y_val, y_test, w2v, num_classes, wordemb, idx):
    input_docs = Input(shape=(X_train_docs.shape[1],), name='DOCS_INPUT')
    model_docs = Embedding(input_dim=4023, output_dim=units, weights=[w2v], trainable=False)(input_docs)
    model_docs = Bidirectional(LSTM(units=units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))(model_docs)
    model_docs = Conv1D(filters=int(filters/2), kernel_size=int(kernel_size_bilstm/2), activation='relu')(model_docs)
    model_docs = MaxPooling1D()(model_docs)
    model_docs = Flatten()(model_docs)
    input_net = Input(shape=(1, 3), name='NETS_INPUT')
    model_net = Bidirectional(LSTM(units=units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))(input_net)
    model_net = Reshape((no_attributes_bilstm, 1))(model_net)
    model_net = Conv1D(filters=filters, kernel_size=kernel_size_bilstm, activation='relu')(model_net)
    model_net = MaxPooling1D()(model_net)
    model_net = Flatten()(model_net)
    combined = Concatenate()([model_docs, model_net])
    output = Dense(units=num_classes, activation='softmax')(combined)
    model = Model(inputs=[input_docs, input_net], outputs=output, name="BiLSTM-11CNN-ContentNets")
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
    start_time = time.time()
    model.fit(x=[X_train_docs, X_train_net], y=y_train, epochs=epochs_n, verbose=1,
              validation_data=([X_val_docs, X_val_net], y_val), batch_size=batch_size, callbacks=[es])
    end_time = time.time()
    y_pred = model.predict([X_test_docs, X_test_net], verbose=0)
    evaluate(y_test, y_pred, modelName=model.name, wordemb=wordemb, iters=idx)
    exc_time = end_time - start_time
    execution[wordemb][model.name].append(exc_time)
    print(f"Time taken to train: {exc_time:.2f} seconds")

def modelContentBiLSTM(X_train_docs, X_val_docs, X_test_docs, y_train, y_val, y_test, w2v, num_classes, wordemb, idx):
    input_docs = Input(shape=(X_train_docs.shape[1],), name='DOCS_INPUT')
    model_docs = Embedding(input_dim=4023, output_dim=units, weights=[w2v], trainable=False)(input_docs)
    model_docs = Bidirectional(LSTM(units=units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))(model_docs)
    model_docs = Flatten()(model_docs)
    output = Dense(units=num_classes, activation='softmax')(model_docs)
    model = Model(inputs=input_docs, outputs=output, name="BiLSTM-Content")
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
    start_time = time.time()
    model.fit(x=X_train_docs, y=y_train, epochs=epochs_n, verbose=1,
              validation_data=(X_val_docs, y_val), batch_size=batch_size, callbacks=[es])
    end_time = time.time()
    y_pred = model.predict(X_test_docs, verbose=0)
    evaluate(y_test, y_pred, modelName=model.name, wordemb=wordemb, iters=idx)
    exc_time = end_time - start_time
    execution[wordemb][model.name].append(exc_time)
    print(f"Time taken to train: {exc_time:.2f} seconds")

def modelContentBiLSTMCNN(X_train_docs, X_val_docs, X_test_docs, y_train, y_val, y_test, w2v, num_classes, wordemb, idx):
    input_docs = Input(shape=(X_train_docs.shape[1],), name='DOCS_INPUT')
    model_docs = Embedding(input_dim=4023, output_dim=units, weights=[w2v], trainable=False)(input_docs)
    model_docs = Bidirectional(LSTM(units=units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))(model_docs)
    model_docs = Conv1D(filters=int(filters/2), kernel_size=int(kernel_size_bilstm/2), activation='relu')(model_docs)
    model_docs = MaxPooling1D()(model_docs)
    model_docs = Flatten()(model_docs)
    output = Dense(units=num_classes, activation='softmax')(model_docs)
    model = Model(inputs=input_docs, outputs=output, name="BiLSTM-CNN-Content")
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
    start_time = time.time()
    model.fit(x=X_train_docs, y=y_train, epochs=epochs_n, verbose=1,
              validation_data=(X_val_docs, y_val), batch_size=batch_size, callbacks=[es])
    end_time = time.time()
    y_pred = model.predict(X_test_docs, verbose=0)
    evaluate(y_test, y_pred, modelName=model.name, wordemb=wordemb, iters=idx)
    exc_time = end_time - start_time
    execution[wordemb][model.name].append(exc_time)
    print(f"Time taken to train: {exc_time:.2f} seconds")

if __name__ == "__main__":
    # Load data
    y = sio.loadmat('labels.mat')['y'][0]
    X_net_std = sio.loadmat('network.mat')['X_net_std']
    X_docs = sio.loadmat('corpus.mat')['X']
    print("y shape:", y.shape)
    print("X_net_std shape:", X_net_std.shape)
    print("X_docs shape:", X_docs.shape)

    # Verify vocabulary size
    vocabulary_size = 4023
    max_size = 117
    print(f"Vocabulary size: {vocabulary_size}, Max size: {max_size}")

    embedding_types = ['w2v_cbow', 'w2v_sg', 'ft_cbow', 'ft_sg', 'glove']
    models = [
        "LSTM-00CNN-ContentNets", "LSTM-01CNN-ContentNets", "LSTM-10CNN-ContentNets", "LSTM-11CNN-ContentNets",
        "LSTM-Content", "LSTM-CNN-Content",
        "BiLSTM-00CNN-ContentNets", "BiLSTM-01CNN-ContentNets", "BiLSTM-10CNN-ContentNets", "BiLSTM-11CNN-ContentNets",
        "BiLSTM-Content", "BiLSTM-CNN-Content"
    ]

    for wordemb in embedding_types:
        accuracies[wordemb] = {model: [] for model in models}
        precisions[wordemb] = {model: [] for model in models}
        recalls[wordemb] = {model: [] for model in models}
        execution[wordemb] = {model: [] for model in models}

        w2v = sio.loadmat(f'{wordemb}.mat')[wordemb]
        print(f"Loaded {wordemb} shape: {w2v.shape}")

        for idx in range(5):
            X_train_docs, X_test_docs, X_train_net, X_test_net, y_train, y_test = train_test_split(
                X_docs, X_net_std, y, test_size=0.30, shuffle=True, stratify=y)
            X_train_docs, X_val_docs, X_train_net, X_val_net, y_train, y_val = train_test_split(
                X_train_docs, X_train_net, y_train, test_size=0.20, shuffle=True, stratify=y_train)
            y_train = to_categorical(y_train, num_classes=num_classes)
            y_test = to_categorical(y_test, num_classes=num_classes)
            y_val = to_categorical(y_val, num_classes=num_classes)

            print(f"\nIteration {idx+1} - Split shapes:")
            print("X_train_docs:", X_train_docs.shape, "X_val_docs:", X_val_docs.shape, "X_test_docs:", X_test_docs.shape)
            print("X_train_net:", X_train_net.shape, "X_val_net:", X_val_net.shape, "X_test_net:", X_test_net.shape)
            print("y_train:", y_train.shape, "y_val:", y_val.shape, "y_test:", y_test.shape)

            print(f"\nRunning models with {wordemb} embedding:")
            modelContentLSTM(X_train_docs, X_val_docs, X_test_docs, y_train, y_val, y_test, w2v, num_classes, wordemb, idx)
            modelContentLSTMCNN(X_train_docs, X_val_docs, X_test_docs, y_train, y_val, y_test, w2v, num_classes, wordemb, idx)
            modelContentNetworkLSTM_00CNN(X_train_docs, X_val_docs, X_test_docs, X_train_net, X_val_net, X_test_net, y_train, y_val, y_test, w2v, num_classes, wordemb, idx)
            modelContentNetworkLSTM_01CNN(X_train_docs, X_val_docs, X_test_docs, X_train_net, X_val_net, X_test_net, y_train, y_val, y_test, w2v, num_classes, wordemb, idx)
            modelContentNetworkLSTM_10CNN(X_train_docs, X_val_docs, X_test_docs, X_train_net, X_val_net, X_test_net, y_train, y_val, y_test, w2v, num_classes, wordemb, idx)
            modelContentNetworkLSTM_11CNN(X_train_docs, X_val_docs, X_test_docs, X_train_net, X_val_net, X_test_net, y_train, y_val, y_test, w2v, num_classes, wordemb, idx)
            modelContentBiLSTM(X_train_docs, X_val_docs, X_test_docs, y_train, y_val, y_test, w2v, num_classes, wordemb, idx)
            modelContentBiLSTMCNN(X_train_docs, X_val_docs, X_test_docs, y_train, y_val, y_test, w2v, num_classes, wordemb, idx)
            modelContentNetworkBiLSTM_00CNN(X_train_docs, X_val_docs, X_test_docs, X_train_net, X_val_net, X_test_net, y_train, y_val, y_test, w2v, num_classes, wordemb, idx)
            modelContentNetworkBiLSTM_01CNN(X_train_docs, X_val_docs, X_test_docs, X_train_net, X_val_net, X_test_net, y_train, y_val, y_test, w2v, num_classes, wordemb, idx)
            modelContentNetworkBiLSTM_10CNN(X_train_docs, X_val_docs, X_test_docs, X_train_net, X_val_net, X_test_net, y_train, y_val, y_test, w2v, num_classes, wordemb, idx)
            modelContentNetworkBiLSTM_11CNN(X_train_docs, X_val_docs, X_test_docs, X_train_net, X_val_net, X_test_net, y_train, y_val, y_test, w2v, num_classes, wordemb, idx)

        print(f"\nSummary for {wordemb}:")
        for model in models:
            print(f"{model} {wordemb} ACCURACY {np.mean(accuracies[wordemb][model]):.4f} ± {np.std(accuracies[wordemb][model]):.4f}")
            print(f"{model} {wordemb} PRECISION {np.mean(precisions[wordemb][model]):.4f} ± {np.std(precisions[wordemb][model]):.4f}")
            print(f"{model} {wordemb} RECALL {np.mean(recalls[wordemb][model]):.4f} ± {np.std(recalls[wordemb][model]):.4f}")
            print(f"{model} {wordemb} EXECUTION TIME {np.mean(execution[wordemb][model]):.2f} ± {np.std(execution[wordemb][model]):.2f}")

y shape: (2282,)
X_net_std shape: (2282, 1, 3)
X_docs shape: (2282, 117)
Vocabulary size: 4023, Max size: 117
Loaded w2v_cbow shape: (4023, 128)

Iteration 1 - Split shapes:
X_train_docs: (1277, 117) X_val_docs: (320, 117) X_test_docs: (685, 117)
X_train_net: (1277, 1, 3) X_val_net: (320, 1, 3) X_test_net: (685, 1, 3)
y_train: (1277, 2) y_val: (320, 2) y_test: (685, 2)

Running models with w2v_cbow embedding:
Epoch 1/5
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2s/step - accuracy: 0.7176 - loss: 0.5778 - val_accuracy: 0.7063 - val_loss: 0.5068
Epoch 2/5
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 392ms/step - accuracy: 0.7441 - loss: 0.4767 - val_accuracy: 0.7094 - val_loss: 0.4921
Epoch 3/5
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 547ms/step - accuracy: 0.7487 - loss: 0.4796 - val_accuracy: 0.7188 - val_loss: 0.4883
Epoch 4/5
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 355ms/step - accuracy: 0.7325 - loss: 0.