In [2]:
import sklearn
import imblearn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from imblearn.over_sampling import SMOTE  # For oversampling

In [3]:
df = pd.read_csv('datasets/sarcasm_dataset.csv')
df.columns

Index(['Unnamed: 0.1', 'Unnamed: 0', 'sarcasm_label', 'emotion_label',
       'sentiment_label', 'id', 'spectral_centroid', 'spectral_bandwidth',
       'pitch', 'energy', 'loudness', 'sentence_level_similarity_emotion',
       'sentence_level_similarity_word', 'exclamation', 'PCA_1', 'PCA_2',
       'PCA_3', 'PCA_4', 'PCA_5', 'PCA_6', 'PCA_7', 'PCA_8', 'PCA_9', 'PCA_10',
       'PCA_11', 'PCA_12', 'PCA_13', 'PCA_14', 'PCA_15', 'PCA_16', 'PCA_17',
       'PCA_18', 'PCA_19', 'PCA_20', 'PCA_21', 'PCA_22', 'PCA_23', 'PCA_24',
       'PCA_25', 'PCA_26', 'PCA_27', 'PCA_28', 'PCA_29', 'PCA_30', 'PCA_31',
       'PCA_32', 'PCA_33', 'PCA_34', 'PCA_35', 'PCA_36', 'PCA_37', 'PCA_38',
       'PCA_39', 'PCA_40', 'PCA_41', 'PCA_42', 'PCA_43', 'PCA_44', 'PCA_45',
       'PCA_46', 'PCA_47', 'PCA_48', 'PCA_49', 'PCA_50'],
      dtype='object')

In [7]:
# Define the feature columns and target column
feature_columns = [
    'spectral_centroid', 'spectral_bandwidth', 'pitch', 'energy', 'loudness',
    'sentence_level_similarity_emotion', 'sentence_level_similarity_word',
    'exclamation'
] + [f'PCA_{i}' for i in range(1, 51)]  # Add PCA_1 to PCA_50
target_column = 'sarcasm_label'

# Load your DataFrame (assuming 'temp' is your DataFrame)
X = df[feature_columns]  # Replace 'temp' with your actual DataFrame
y = df[target_column]

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

# Handle class imbalance using SMOTE
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Logistic Regression with increased iterations and class balancing
logistic_model = LogisticRegression(max_iter=500, random_state=42)
logistic_model.fit(X_train_resampled, y_train_resampled)

# Predict on the test set
y_pred_logistic = logistic_model.predict(X_test)

# Evaluate Logistic Regression Model
print("Logistic Regression Results:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_logistic):.2f}")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_logistic))
print("Classification Report:\n", classification_report(y_test, y_pred_logistic))

Logistic Regression Results:
Accuracy: 0.71
Confusion Matrix:
 [[27 11]
 [ 4 10]]
Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.71      0.78        38
           1       0.48      0.71      0.57        14

    accuracy                           0.71        52
   macro avg       0.67      0.71      0.68        52
weighted avg       0.76      0.71      0.73        52



In [8]:
# Naive Bayes Model
from sklearn.naive_bayes import GaussianNB

nb_model = GaussianNB()
nb_model.fit(X_train_resampled, y_train_resampled)
y_pred_nb = nb_model.predict(X_test)

# Evaluate Naive Bayes Model
print("\nNaive Bayes Results:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_nb):.2f}")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_nb))
print("Classification Report:\n", classification_report(y_test, y_pred_nb))


Naive Bayes Results:
Accuracy: 0.73
Confusion Matrix:
 [[24 14]
 [ 0 14]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.63      0.77        38
           1       0.50      1.00      0.67        14

    accuracy                           0.73        52
   macro avg       0.75      0.82      0.72        52
weighted avg       0.87      0.73      0.75        52

