sadness (0), joy (1), love (2), anger (3), fear (4)

In [None]:
 # Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

In [None]:
train_df = pd.read_csv('training.csv')
val_df = pd.read_csv('validation.csv')
test_df = pd.read_csv('test.csv')

In [None]:
pd.concat([train_df,val_df]).shape

In [None]:
train_df.head(3)

In [None]:
def preprocess_text(text):
    # Lowercase the text
    text = text.lower()
    # Remove non-alphanumeric characters
    text = ''.join([char for char in text if char.isalnum() or char.isspace()])
    return text

In [None]:
# Apply preprocessing to each dataset
train_df['processed_text'] = train_df['text'].apply(preprocess_text)
val_df['processed_text'] = val_df['text'].apply(preprocess_text)
test_df['processed_text'] = test_df['text'].apply(preprocess_text)

In [None]:
# Extract features (X) and labels (y) from the preprocessed text
X_train = train_df['processed_text']
y_train = train_df['label']
X_val = val_df['processed_text']
y_val = val_df['label']
X_test = test_df['processed_text']
y_test = test_df['label']

In [None]:
# Convert text data to numeric form using TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=1000)  # You can adjust the number of features as needed
X_train_tfidf = vectorizer.fit_transform(X_train)
X_val_tfidf = vectorizer.transform(X_val)
X_test_tfidf = vectorizer.transform(X_test)

In [None]:
# Train a simple machine learning model (Logistic Regression)
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

In [None]:
# Evaluate the model on the validation set
y_val_pred = model.predict(X_val_tfidf)
print("Validation Accuracy:", accuracy_score(y_val, y_val_pred))
print("Classification Report (Validation):\n", classification_report(y_val, y_val_pred))

In [None]:
# Evaluate the model on the test set
y_test_pred = model.predict(X_test_tfidf)
print("Test Accuracy:", accuracy_score(y_test, y_test_pred))
print("Classification Report (Test):\n", classification_report(y_test, y_test_pred))

In [None]:
values = ['sad','happy','love','anger','fear']

In [None]:
my_input = "i am very shocked after hearing about you"

In [None]:
my_input = preprocess_text(my_input)
feed_input = vectorizer.transform([my_input])

In [None]:
output = model.predict(pd.Series([feed_input])[0])[0]
print(values[output])

In [None]:
model.predict_proba(pd.Series([feed_input])[0])[0]*100