In [1]:
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras import layers
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import pandas as pd

In [14]:
df = pd.read_csv('data_train.csv',encoding='utf-8')

In [15]:
df.head()

Unnamed: 0,text,label
0,"dad, ye kya kar rhe hai aap?",surprise
1,anti-bai spray laga rha hun.,neutral
2,anti-bai spray?,neutral
3,"wo apni bai hai na, sita or geeta, double role...",disgust
4,ese matlab?,joy


In [16]:
unique_values = df['label'].unique()

print(unique_values)

['surprise' 'neutral' 'disgust' 'joy' 'anger' 'fear' 'sadness' 'contempt']


In [17]:
import re
import string

def preprocessing(text):
    processed_text=text.lower()
    pattern=re.compile(r'https?://\S+|www\.\S+')
    processed_text=pattern.sub(r'',processed_text)
    return processed_text

In [18]:
df["text"]=df["text"].apply(preprocessing)

In [19]:
print(df["text"])

0                            dad, ye kya kar rhe hai aap?
1                            anti-bai spray laga rha hun.
2                                         anti-bai spray?
3       wo apni bai hai na, sita or geeta, double role...
4                                             ese matlab?
                              ...                        
8501    haila, sahil! tumhare diwali lunch per hai na ...
8502    kyunki jab maine wahi joke rosesh pe mara to b...
8503    acha now cut it out huh indravadan. rosesh swe...
8504    ok chaliye dad, mein aapko bahar fenk kar aata...
8505    haha, you asked for that one. sahil darling tu...
Name: text, Length: 8506, dtype: object


In [20]:
df.replace({"label":{"surprise":0,"neutral":1,"disgust":2,"joy":3,"anger":4,"fear":5,"sadness":6,"contempt":7 } },inplace=True)

In [21]:
df.head(7)

Unnamed: 0,text,label
0,"dad, ye kya kar rhe hai aap?",0
1,anti-bai spray laga rha hun.,1
2,anti-bai spray?,1
3,"wo apni bai hai na, sita or geeta, double role...",2
4,ese matlab?,3
5,are uske badan me se train ke third class comp...,2
6,"to dad, use aap paas bethaiye aur usko uski pr...",1


In [22]:
unique_values = df['label'].unique()

print(unique_values)

[0 1 2 3 4 5 6 7]


In [23]:
X=df["text"].values
y=df["label"].values

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=123)

In [25]:
X_train.shape

(7655,)

In [35]:
def my_tokenizer(s):
  return s.split(' ')

In [36]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer() 

In [37]:
X_train_new=cv.fit_transform(X_train)
X_test_new=cv.transform(X_test)



In [38]:
X_train_new[0]

<1x14029 sparse matrix of type '<class 'numpy.int64'>'
	with 19 stored elements in Compressed Sparse Row format>

In [39]:
from sklearn.naive_bayes import MultinomialNB
mn=MultinomialNB(alpha=0.7)
mn.fit(X_train_new,y_train)

In [40]:
y_pred=mn.predict(X_test_new)

In [50]:
from sklearn.metrics import accuracy_score, classification_report
print("MultinomialNB:")
print("Accuracy:", accuracy_score(y_test, y_pred))

MultinomialNB:
Accuracy: 0.4864864864864865


In [41]:
from sklearn.metrics import classification_report

In [42]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.67      0.04      0.07        55
           1       0.49      0.92      0.64       403
           2       0.00      0.00      0.00         8
           3       0.49      0.20      0.29       162
           4       0.29      0.06      0.10        67
           5       0.29      0.04      0.07        47
           6       0.40      0.04      0.08        48
           7       0.25      0.03      0.06        61

    accuracy                           0.49       851
   macro avg       0.36      0.17      0.16       851
weighted avg       0.45      0.49      0.38       851



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [43]:
from sklearn.tree import DecisionTreeClassifier

In [44]:
# Create Decision Tree Classifier
dt_clf = DecisionTreeClassifier(random_state=42)

In [45]:
# Fit the classifier to the training data
dt_clf.fit(X_train_new, y_train)

In [47]:
# Make predictions on the test data
y_pred_dt = dt_clf.predict(X_test_new)

In [49]:
# Evaluate model performance
from sklearn.metrics import accuracy_score, classification_report
print("Decision Tree Classifier:")
print("Accuracy:", accuracy_score(y_test, y_pred_dt))

Decision Tree Classifier:
Accuracy: 0.4018801410105758


In [70]:
print("Classification Report:\n", classification_report(y_test, y_pred_dt))

Classification Report:
               precision    recall  f1-score   support

           0       0.32      0.25      0.28        55
           1       0.52      0.67      0.58       403
           2       0.00      0.00      0.00         8
           3       0.32      0.22      0.26       162
           4       0.14      0.12      0.13        67
           5       0.05      0.04      0.05        47
           6       0.14      0.10      0.12        48
           7       0.24      0.13      0.17        61

    accuracy                           0.40       851
   macro avg       0.21      0.19      0.20       851
weighted avg       0.36      0.40      0.37       851



# Logistic Regression Classifier with OvR strategy 

In [51]:
from sklearn.linear_model import LogisticRegression

In [52]:
# Create Logistic Regression Classifier with OvR strategy
log_reg_ovr = LogisticRegression(multi_class='ovr', random_state=42)


In [53]:
# Fit the classifier to the training data
log_reg_ovr.fit(X_train_new, y_train)

In [55]:
# Make predictions on the test data
y_pred_ovr = log_reg_ovr.predict(X_test_new)


In [56]:
# Evaluate model performance
from sklearn.metrics import accuracy_score, classification_report
print("Logistic Regression with OvR:")
print("Accuracy:", accuracy_score(y_test, y_pred_ovr))

Logistic Regression with OvR:
Accuracy: 0.48296122209165687


In [69]:
print("Classification Report:\n", classification_report(y_test, y_pred_ovr))

Classification Report:
               precision    recall  f1-score   support

           0       0.60      0.16      0.26        55
           1       0.52      0.83      0.64       403
           2       0.00      0.00      0.00         8
           3       0.43      0.28      0.34       162
           4       0.22      0.10      0.14        67
           5       0.38      0.11      0.17        47
           6       0.24      0.10      0.14        48
           7       0.28      0.11      0.16        61

    accuracy                           0.48       851
   macro avg       0.33      0.21      0.23       851
weighted avg       0.44      0.48      0.42       851



# LogisticRegression_with_Softmax

In [57]:
# Create Logistic Regression Classifier with Softmax (Multinomial) strategy
log_reg_softmax = LogisticRegression(multi_class='multinomial', solver='lbfgs', random_state=42)

In [58]:
# Fit the classifier to the training data
log_reg_softmax.fit(X_train_new, y_train)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [59]:
# Make predictions on the test data
y_pred_softmax = log_reg_softmax.predict(X_test_new)

In [60]:
print("\nLogistic Regression with Softmax (Multinomial):")
print("Accuracy:", accuracy_score(y_test, y_pred_softmax))


Logistic Regression with Softmax (Multinomial):
Accuracy: 0.4888366627497062


In [61]:
print("Classification Report:\n", classification_report(y_test, y_pred_softmax))

Classification Report:
               precision    recall  f1-score   support

           0       0.56      0.18      0.27        55
           1       0.53      0.82      0.64       403
           2       0.00      0.00      0.00         8
           3       0.44      0.28      0.35       162
           4       0.24      0.13      0.17        67
           5       0.35      0.15      0.21        47
           6       0.33      0.15      0.20        48
           7       0.27      0.11      0.16        61

    accuracy                           0.49       851
   macro avg       0.34      0.23      0.25       851
weighted avg       0.45      0.49      0.44       851



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [63]:
y_pred_softmax[1]

1

# SVM

In [62]:
from sklearn.svm import SVC

In [64]:
svm_ovo = SVC(kernel='linear', decision_function_shape='ovo', random_state=42)

In [65]:
# Fit the classifier to the training data
svm_ovo.fit(X_train_new, y_train)



In [66]:
# Make predictions on the test data
y_pred_ovo = svm_ovo.predict(X_test_new)

In [67]:
# Evaluate model performance
print("SVM with OvO:")
print("Accuracy:", accuracy_score(y_test, y_pred_ovo))

SVM with OvO:
Accuracy: 0.44770857814336074


In [68]:
print("Classification Report:\n", classification_report(y_test, y_pred_ovo))

Classification Report:
               precision    recall  f1-score   support

           0       0.40      0.22      0.28        55
           1       0.51      0.74      0.60       403
           2       0.00      0.00      0.00         8
           3       0.40      0.24      0.30       162
           4       0.19      0.15      0.17        67
           5       0.33      0.13      0.18        47
           6       0.26      0.15      0.19        48
           7       0.25      0.16      0.20        61

    accuracy                           0.45       851
   macro avg       0.29      0.22      0.24       851
weighted avg       0.41      0.45      0.41       851

