[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/hamidrg/Textual_Emotion_detect/blob/master/MLs/LinearSVC_SGD.ipynb)

In [None]:
import pandas as pd

import re
import nltk
import string
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import SGDClassifier
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, accuracy_score

In [None]:
df = pd.read_csv("ISEAR-raw-utf8.csv")
df.head()

Unnamed: 0,ID,CITY,COUN,SUBJ,SEX,AGE,RELI,PRAC,FOCC,MOCC,...,SELF,RELA,VERBAL,NEUTRO,Field1,Field3,Field2,MYKEY,SIT,STATE
0,11001,1,1,1,1,33,1,2,6,1,...,3,3,2,0,joy,4,3,110011,"During the period of falling in love, each tim...",1
1,11001,1,1,1,1,33,1,2,6,1,...,2,2,0,0,fear,3,2,110012,When I was involved in a traffic accident.,1
2,11001,1,1,1,1,33,1,2,6,1,...,2,1,0,0,anger,1,3,110013,When I was driving home after several days of...,1
3,11001,1,1,1,1,33,1,2,6,1,...,1,1,0,2,sadness,4,4,110014,When I lost the person who meant the most to me.,1
4,11001,1,1,1,1,33,1,2,6,1,...,0,2,0,0,disgust,4,4,110015,The time I knocked a deer down - the sight of ...,1


In [None]:
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def clean_text_nltk(text):
    text = re.sub(r'@\w+', '', text)
    text = re.sub(r"http\S+|www.\S+", '', text)
    text = text.lower()
    tokens = nltk.word_tokenize(text)

    cleaned_tokens = []
    for token in tokens:
        if (
            token not in stop_words and
            token not in string.punctuation and
            not re.match(r'\S+@\S+', token) and
            not re.match(r"http\S+|www.\S+", token)
        ):
            lemma = lemmatizer.lemmatize(token)
            cleaned_tokens.append(lemma)

    return " ".join(cleaned_tokens)

df['Clean_Text'] = df['SIT'].apply(clean_text_nltk)
df.head()

Unnamed: 0,ID,CITY,COUN,SUBJ,SEX,AGE,RELI,PRAC,FOCC,MOCC,...,RELA,VERBAL,NEUTRO,Field1,Field3,Field2,MYKEY,SIT,STATE,Clean_Text
0,11001,1,1,1,1,33,1,2,6,1,...,3,2,0,joy,4,3,110011,"During the period of falling in love, each tim...",1,period falling love time met á especially met ...
1,11001,1,1,1,1,33,1,2,6,1,...,2,0,0,fear,3,2,110012,When I was involved in a traffic accident.,1,involved traffic accident
2,11001,1,1,1,1,33,1,2,6,1,...,1,0,0,anger,1,3,110013,When I was driving home after several days of...,1,driving home several day hard work á motorist ...
3,11001,1,1,1,1,33,1,2,6,1,...,1,0,2,sadness,4,4,110014,When I lost the person who meant the most to me.,1,lost person meant
4,11001,1,1,1,1,33,1,2,6,1,...,2,0,0,disgust,4,4,110015,The time I knocked a deer down - the sight of ...,1,time knocked deer sight animal 's á injury hel...


In [None]:
X = df['Clean_Text'].values
y = df['Field1'].values

le = LabelEncoder()
y_encoded = le.fit_transform(y)

tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
tokenizer.fit_on_texts(X)
sequences = tokenizer.texts_to_sequences(X)

maxlen = 400
X_padded = pad_sequences(sequences, maxlen=maxlen, padding='post')
X = df['Clean_Text']
y = df['EMOT']
le = LabelEncoder()
y_encoded = le.fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)
vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

In [None]:
svc_model = LinearSVC()
svc_model.fit(X_train_tfidf, y_train)
y_pred_svc = svc_model.predict(X_test_tfidf)
print("📊 LinearSVC Performance:")
print(classification_report(y_test, y_pred_svc, target_names=['joy', 'fear', 'anger', 'sadness', 'disgust', 'shame', 'guilt']))
print("Accuracy:", accuracy_score(y_test, y_pred_svc))

📊 LinearSVC Performance:
              precision    recall  f1-score   support

         joy       0.69      0.68      0.68       219
        fear       0.64      0.68      0.66       219
       anger       0.45      0.46      0.45       219
     sadness       0.64      0.65      0.65       219
     disgust       0.55      0.54      0.54       220
       shame       0.46      0.47      0.47       219
       guilt       0.51      0.45      0.48       219

    accuracy                           0.56      1534
   macro avg       0.56      0.56      0.56      1534
weighted avg       0.56      0.56      0.56      1534

Accuracy: 0.5625814863102999


In [None]:
sgd_model = SGDClassifier(loss='hinge', penalty='l2', max_iter=1000, random_state=42)
sgd_model.fit(X_train_tfidf, y_train)
y_pred_sgd = sgd_model.predict(X_test_tfidf)
print("📊 SGDClassifier Performance:")
print(classification_report(y_test, y_pred_sgd, target_names=['joy', 'fear', 'anger', 'sadness', 'disgust', 'shame', 'guilt']))
print("Accuracy:", accuracy_score(y_test, y_pred_sgd))


📊 SGDClassifier Performance:
              precision    recall  f1-score   support

         joy       0.68      0.69      0.68       219
        fear       0.63      0.73      0.68       219
       anger       0.47      0.45      0.46       219
     sadness       0.66      0.66      0.66       219
     disgust       0.56      0.58      0.57       220
       shame       0.48      0.45      0.46       219
       guilt       0.54      0.48      0.51       219

    accuracy                           0.58      1534
   macro avg       0.57      0.58      0.57      1534
weighted avg       0.57      0.58      0.57      1534

Accuracy: 0.5769230769230769


In [None]:
label_map = {
    1: 'joy',
    2: 'fear',
    3: 'anger',
    4: 'sadness',
    5: 'disgust',
    6: 'shame',
    7: 'guilt'
}

def predict_emotion(text, model, vectorizer, label_encoder):
    text_cleaned = clean_text_nltk(text)
    vectorized = vectorizer.transform([text_cleaned])
    pred = model.predict(vectorized)
    original_label = label_encoder.inverse_transform(pred)[0]
    return label_map[original_label]

print(predict_emotion("I was so scared during the exam", svc_model, vectorizer, le))


fear
