__Linear Regression Model__

In [2]:
# Import necessary modules
import pandas as pd
import numpy as np
import re
from nltk.sentiment import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
df = pd.read_excel('C:\\Users\\amins\\Desktop\\Cleaned Dataset.xlsx')
X = df['Text']  # feature
y = df['Class']  # target

# Extract emotion features using VADER sentiment analysis
sia = SentimentIntensityAnalyzer()
df['sentiment_scores'] = df['Text'].apply(lambda x: sia.polarity_scores(' '.join(x)))
df['compound'] = df['sentiment_scores'].apply(lambda x: x['compound'])

# Extract statistical features
df['text_length'] = df['Text'].apply(len)
df['average_word_length'] = df['Text'].apply(lambda x: np.mean([len(word) for word in x]) if x else 0)

# Combine emotion and statistical features into a single DataFrame
feature_df = df[['compound', 'text_length', 'average_word_length']]

# Standardize features using StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(feature_df)

# Get the target labels
# Assuming your target label is in a column named 'Class'
y = df['Class'].values

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Linear Regression classifier using Scikit-learn
clf = LinearRegression()

# Assuming y is your target variable
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(df['Class'])  # replace 'target' with your actual target column name

# Now you can use y_encoded as your target for training the classifier
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)
y_pred = [1 if prob > 0.5 else 0 for prob in y_pred]

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy: {:.2f}%".format(accuracy * 100))
print(classification_report(y_test,y_pred))

Test Accuracy: 69.41%
              precision    recall  f1-score   support

           0       0.64      0.90      0.75     23287
           1       0.83      0.48      0.61     23128

    accuracy                           0.69     46415
   macro avg       0.74      0.69      0.68     46415
weighted avg       0.73      0.69      0.68     46415



In [3]:
import joblib

model = clf 

joblib.dump(model, 'C://Users//amins//Desktop//linear_regression_model.pkl')

['C://Users//amins//Desktop//linear_regression_model.pkl']