In [1]:
from itertools import product
from pandas import DataFrame
import pandas as pd
import numpy as np
import spacy
import os
from pathlib import Path
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, OrdinalEncoder
import tensorflow as tf
from sklearn.model_selection import train_test_split




In [13]:
from source import prepare_data
from source import dataset as dat
from source import multi_layer_perceptron as mlp

In [5]:
"""
Preprocessing
"""
# load models and dataset
df = pd.read_csv("hf://datasets/gxb912/large-twitter-tweets-sentiment/train.csv", nrows=100)
nlp = spacy.load("en_core_web_sm")

# prepare dataset
df = prepare_data.prepare_df(df, nlp)

In [6]:
df

Unnamed: 0,sentiment,clean_text,clean_text_str,text
0,1,"[love, you, toooooo, TG, LOL, Gngb]",love you toooooo TG LOL Gngb,@tonigirl14 love you toooooo ! ! TG LOL Gngb
1,0,"[I, tell, myself, do, click, on, this, link, b...",I tell myself do click on this link but I just...,@jun6lee I told myself : Do n't click on this ...
2,0,"[the, man, who, render, his, voice, to, Mickey...",the man who render his voice to Mickey Mouse e...,The man who rendered his voice to Mickey Mouse...
3,1,"[I, think, red, would, be, nice, or, maybe, yo...",I think red would be nice or maybe you could d...,@Shontelle_Layne I think red would be nice . ...
4,0,"[I, guess, one, of, her, tweet, say, she, be, ...",I guess one of her tweet say she be confused,@Silverlines - I guess . 'Cause one of her twe...
...,...,...,...,...
95,0,"[no, way, be, I, go, to, finish, this, coursew...",no way be I go to finish this coursework for t...,No way am i gon na finish this coursework for ...
96,1,"[do, with, work, and, Mondayitis, time, for, c...",do with work and Mondayitis time for coffee wi...,Done with work and Mondayitis . Time for coffe...
97,0,"[yea, its, very, sad, I, will, not, get, it, t...",yea its very sad I will not get it till I get ...,@livnb yeas its very sad .. i wo nt get it til...
98,1,"[got, an, appointment, with, the, Doc, later, ...",got an appointment with the Doc later on I be ...,Got an appointment with the Doc later on ... I...


In [7]:
dataset = dat.Text_Dataset(df, col_text="clean_text_str", col_label="sentiment", args={"max_features":5000})
dataset.split_dataset()

In [14]:
X_train_TF, X_test_TF, Y_train_TF, Y_test_TF = dataset.get_encodings(tfidf=True)

ev = mlp.find_best_mlp(X_train=X_train_TF,
                  Y_train=Y_train_TF,
                  X_test=X_test_TF,
                  Y_test=Y_test_TF,
                  n_trials = 3)

[I 2025-06-12 14:58:09,770] A new study created in memory with name: mlp_optimization
[I 2025-06-12 14:58:10,433] Trial 0 finished with values: [0.6840411424636841, 0.550000011920929] and parameters: {'epochs': 163, 'learning_rate': 0.481149546618212, 'units': 10}.
[I 2025-06-12 14:58:10,978] Trial 1 finished with values: [0.6570560932159424, 0.550000011920929] and parameters: {'epochs': 128, 'learning_rate': 0.43366987184597494, 'units': 10}.
[I 2025-06-12 14:58:11,655] Trial 2 finished with values: [0.6658757328987122, 0.6000000238418579] and parameters: {'epochs': 184, 'learning_rate': 0.3199867633429609, 'units': 2}.


   number      loss  accuracy             datetime_start  \
0       0  0.684041      0.55 2025-06-12 14:58:09.770677   
1       1  0.657056      0.55 2025-06-12 14:58:10.433308   
2       2  0.665876      0.60 2025-06-12 14:58:10.979058   

           datetime_complete               duration  params_epochs  \
0 2025-06-12 14:58:10.433308 0 days 00:00:00.662631            163   
1 2025-06-12 14:58:10.978056 0 days 00:00:00.544748            128   
2 2025-06-12 14:58:11.654735 0 days 00:00:00.675677            184   

   params_learning_rate  params_units  system_attrs_NSGAIISampler:generation  \
0              0.481150            10                                      0   
1              0.433670            10                                      0   
2              0.319987             2                                      0   

      state  
0  COMPLETE  
1  COMPLETE  
2  COMPLETE  


In [15]:
ev

Unnamed: 0,number,loss,accuracy,datetime_start,datetime_complete,duration,params_epochs,params_learning_rate,params_units,system_attrs_NSGAIISampler:generation,state
0,0,0.684041,0.55,2025-06-12 14:58:09.770677,2025-06-12 14:58:10.433308,0 days 00:00:00.662631,163,0.48115,10,0,COMPLETE
1,1,0.657056,0.55,2025-06-12 14:58:10.433308,2025-06-12 14:58:10.978056,0 days 00:00:00.544748,128,0.43367,10,0,COMPLETE
2,2,0.665876,0.6,2025-06-12 14:58:10.979058,2025-06-12 14:58:11.654735,0 days 00:00:00.675677,184,0.319987,2,0,COMPLETE
