In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

import warnings
warnings.filterwarnings("ignore")

# Veri setini yükleme
data = pd.read_csv('/kaggle/input/emotions-dataset-for-nlp/train.txt', names=['text', 'emotion'], sep=';')

In [16]:
data

Unnamed: 0,text,emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger
...,...,...
15995,i just had a very brief time in the beanbag an...,sadness
15996,i am now turning and i feel pathetic that i am...,sadness
15997,i feel strong and good overall,joy
15998,i feel like this was such a rude comment and i...,anger


In [17]:
data.isna().sum()

text       0
emotion    0
dtype: int64

In [18]:
data.info

<bound method DataFrame.info of                                                     text  emotion
0                                i didnt feel humiliated  sadness
1      i can go from feeling so hopeless to so damned...  sadness
2       im grabbing a minute to post i feel greedy wrong    anger
3      i am ever feeling nostalgic about the fireplac...     love
4                                   i am feeling grouchy    anger
...                                                  ...      ...
15995  i just had a very brief time in the beanbag an...  sadness
15996  i am now turning and i feel pathetic that i am...  sadness
15997                     i feel strong and good overall      joy
15998  i feel like this was such a rude comment and i...    anger
15999  i know a lot but i feel so stupid because i ca...  sadness

[16000 rows x 2 columns]>

In [19]:
# LabelEncoder örneği oluşturma
label_encoder = LabelEncoder()

# Duygu etiketlerini sayısal değerlere dönüştürme
data['emotion'] = label_encoder.fit_transform(data['emotion'])

In [20]:
data

Unnamed: 0,text,emotion
0,i didnt feel humiliated,4
1,i can go from feeling so hopeless to so damned...,4
2,im grabbing a minute to post i feel greedy wrong,0
3,i am ever feeling nostalgic about the fireplac...,3
4,i am feeling grouchy,0
...,...,...
15995,i just had a very brief time in the beanbag an...,4
15996,i am now turning and i feel pathetic that i am...,4
15997,i feel strong and good overall,2
15998,i feel like this was such a rude comment and i...,0


In [21]:
# Eğitim ve test setlerine ayırma
X_train, X_test, y_train, y_test = train_test_split(data['text'], data['emotion'], test_size=0.2, random_state=42)

# TF-IDF vektörleştirici ve Logistic Regression modeli
model = make_pipeline(TfidfVectorizer(min_df=3, max_df=0.7, ngram_range=(1, 2)), LogisticRegression(max_iter=1000))

# Modeli eğitme
model.fit(X_train, y_train)

# Test setinde tahminler yapma ve modeli değerlendirme
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
report = classification_report(y_test, predictions, target_names=emotion_labels)

print("Doğruluk:", accuracy)
print("Sınıflandırma Raporu:\n", report)

Doğruluk: 0.80375
Sınıflandırma Raporu:
               precision    recall  f1-score   support

     sadness       0.92      0.69      0.79       427
       anger       0.86      0.65      0.74       397
        love       0.73      0.97      0.83      1021
    surprise       0.87      0.42      0.57       296
        fear       0.83      0.93      0.88       946
         joy       0.82      0.25      0.38       113

    accuracy                           0.80      3200
   macro avg       0.84      0.65      0.70      3200
weighted avg       0.82      0.80      0.79      3200



In [22]:
from sklearn.model_selection import GridSearchCV

# Hiperparametre grid'i tanımlama
param_grid = {
    'logisticregression__C': [0.01, 0.1, 1, 10, 100],
    'logisticregression__penalty': ['l1', 'l2']
}

# GridSearchCV ile en iyi hiperparametreleri bulma
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# En iyi parametrelerle modeli değerlendirme
best_model = grid_search.best_estimator_
best_predictions = best_model.predict(X_test)
best_accuracy = accuracy_score(y_test, best_predictions)
best_report = classification_report(y_test, best_predictions, target_names=emotion_labels)

print("En İyi Doğruluk:", best_accuracy)
print("En İyi Sınıflandırma Raporu:\n", best_report)
print("En İyi Parametreler:", grid_search.best_params_)

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3508, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_42/2213929479.py", line 11, in <module>
    grid_search.fit(X_train, y_train)
  File "/opt/conda/lib/python3.10/site-packages/sklearn/model_selection/_search.py", line 874, in fit
    self._run_search(evaluate_candidates)
  File "/opt/conda/lib/python3.10/site-packages/sklearn/model_selection/_search.py", line 1388, in _run_search
    evaluate_candidates(ParameterGrid(self.param_grid))
  File "/opt/conda/lib/python3.10/site-packages/sklearn/model_selection/_search.py", line 821, in evaluate_candidates
    out = parallel(
  File "/opt/conda/lib/python3.10/site-packages/sklearn/utils/parallel.py", line 63, in __call__
    return super().__call__(iterable_with_config)
  File "/opt/conda/lib/python3.10/site-packages/joblib/parallel.py", line 1863, in __call__
    return outpu