In [1]:
import pandas as pd
import neattext.functions as nfx
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import joblib

# Load the dataset
df = pd.read_csv('emotion_dataset.csv')

# Cleaning text: remove user handles and stopwords
df['Clean_Text'] = df['Text'].apply(nfx.remove_userhandles)
df['Clean_Text'] = df['Clean_Text'].apply(nfx.remove_stopwords)

# Inspect the cleaned data
print(df.head())

# Features and labels
Xfeatures = df['Clean_Text']
ylabels = df['Emotion']

# Vectorization with max_features to limit the number of features
vectorizer = CountVectorizer(max_features=10000)  # Limiting to 10,000 features
Xfeatures = vectorizer.fit_transform(Xfeatures)

# Save the vectorizer
joblib.dump(vectorizer, 'vectorizer.pkl')

# Train-test split
x_train, x_test, y_train, y_test = train_test_split(Xfeatures, ylabels, test_size=0.3, random_state=42)

# Scale the data
scaler = StandardScaler(with_mean=False)  # Set with_mean=False for sparse matrix
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

# Initialize and train the logistic regression model
model = LogisticRegression(max_iter=200, solver='saga')
model.fit(x_train_scaled, y_train)

# Check the accuracy
accuracy = model.score(x_test_scaled, y_test)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Save the trained model
joblib.dump(model, 'emotion_classifier_model.pkl')
print("Model and vectorizer have been saved successfully.")


   Unnamed: 0  Emotion                                               Text  \
0           0  neutral                                             Why ?    
1           1      joy    Sage Act upgrade on my to do list for tommorow.   
2           2  sadness  ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...   
3           3      joy   Such an eye ! The true hazel eye-and so brill...   
4           4      joy  @Iluvmiasantos ugh babe.. hugggzzz for u .!  b...   

                                          Clean_Text  
0                                                  ?  
1                    Sage Act upgrade list tommorow.  
2  WAY HOMEGIRL BABY FUNERAL!!! MAN HATE FUNERALS...  
3  eye ! true hazel eye-and brilliant ! Regular f...  
4  ugh babe.. hugggzzz u .! babe naamazed nga ako...  
Accuracy: 56.95%
Model and vectorizer have been saved successfully.


