In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.pipeline import make_pipeline
import json


corpus_path = "C:/Users/HP/OneDrive/Desktop/CODE/lanat/dataset/corpus.txt"
labels_path = "C:/Users/HP/OneDrive/Desktop/CODE/lanat/dataset/labels.txt"

with open(corpus_path, 'r', encoding='utf-8') as corpus_file:
    corpus = corpus_file.readlines()

with open(labels_path, 'r', encoding='utf-8') as labels_file:
    labels = labels_file.readlines()


pipeline = make_pipeline(TfidfVectorizer(), SVC())

param_grid = {
    'svc__C': [0.1, 1, 10],
    'svc__kernel': ['linear', 'rbf', 'poly'],
    'svc__gamma': ['scale', 'auto', 0.1, 1],
    'svc__class_weight': [None, 'balanced']
}

grid_search = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1)
grid_search.fit(corpus, labels)


print("Best Parameters:", grid_search.best_params_)

best_model = grid_search.best_estimator_


In [None]:

final_model = make_pipeline(TfidfVectorizer(), SVC(C=10, kernel='rbf', gamma=0.1, class_weight=None))
final_model.fit(corpus, labels)


import pickle

emotion_file_path = r"C:\Users\HP\OneDrive\Desktop\CODE\lanat\bigram_lm\generated_sentences.lanat"

with open(emotion_file_path, 'rb') as emotion_file:
    emotions_data = pickle.load(emotion_file)

# Initialize empty lists to store predictions and true labels
predictions = []
true_labels = []


for emotion, sentences in emotions_data.items():

    y_pred = final_model.predict(sentences)
    predictions.extend(y_pred)
    true_labels.extend([emotion] * len(sentences))
    
predicts=[predict[:-1] for predict in predictions]
print(predictions)
print(true_labels)
# Evaluate the performance on the generated sentences
accuracy_generated = accuracy_score(true_labels, predicts)
classification_rep_generated = classification_report(true_labels, predicts)

print("Generated Sentences Accuracy:", accuracy_generated)
print("Generated Sentences Classification Report:\n", classification_rep_generated)


