SAMPLE GENERATION (BIGRAM LEVEL)

In [4]:
file_path = '/Users/shrutijha/Desktop/corpus.txt'
from utils import emotion_scores

In [None]:
def generate_text_with_emotion(bigram_model, min_size, target_emotion):
    while True:
        text = ['$']
        for _ in range(min_size):  
            possible_bigrams = [(text[-1], word) for word in bigram_model.bigram_counts[text[-1]].keys()]
            if not possible_bigrams:
                break 

            probabilities = []
            for bigram in possible_bigrams:
                word1, word2 = bigram
                bigram_probability = bigram_model.get_probability(word1, word2)
                bigram_emotions = emotion_scores(f"{word1} {word2}")  
                emotion_score = next((e['score'] for e in bigram_emotions if e['label'] == target_emotion), 0)
                adjusted_probability = bigram_probability + emotion_score
                probabilities.append((word2, adjusted_probability))

            total_prob = sum(prob for _, prob in probabilities)
            normalized_probs = [(word, prob / total_prob) for word, prob in probabilities]

            next_word = np.random.choice([word for word, _ in normalized_probs], p=[prob for _, prob in normalized_probs])
            text.append(next_word)

        generated_text = ' '.join(text[1:])  
        if len(generated_text.split()) >= min_size:
            break

    generated_text_emotion_scores = emotion_scores(generated_text)  
    return generated_text, generated_text_emotion_scores

kn_model = BigramLM.load_model('kn_model.pkl')

def write_generated_texts_to_file(bigram_lm, sample_size, target_emotion, num_samples, file_name):
    with open(file_name, 'w') as file:
        for i in range(num_samples):
            generated_text, generated_text_emotion_scores = generate_text_with_emotion(bigram_lm,sample_size, target_emotion)
            file.write(f"{generated_text}\n")

In [None]:
min_sample_size = 8
target_emotion = 'anger' 
file = 'gen_anger2.txt'  
write_generated_texts_to_file(kn_model, min_sample_size, target_emotion, 50, file)

In [None]:
min_sample_size = 10
target_emotion = 'surprise' 
file = 'gen_surprise2.txt'  
write_generated_texts_to_file(kn_model, min_sample_size, target_emotion, 50, file)

In [None]:
min_sample_size = 10
target_emotion = 'joy' 
file = 'gen_joy2.txt'  
write_generated_texts_to_file(kn_model, min_sample_size, target_emotion, 50, file)

In [None]:
min_sample_size = 10
target_emotion = 'love' 
file = 'gen_love2.txt'  
write_generated_texts_to_file(kn_model, min_sample_size, target_emotion, 50, file)

In [None]:
min_sample_size = 10
target_emotion = 'sadness' 
file = 'gen_sadness2.txt'  
write_generated_texts_to_file(kn_model, min_sample_size, target_emotion, 50, file)

In [None]:
min_sample_size = 10
target_emotion = 'fear' 
file = 'gen_fear2.txt'  
write_generated_texts_to_file(kn_model, min_sample_size, target_emotion, 50, file)

SAMPLE GENERATION (UNIGRAM LEVEL)

In [None]:
def generate_text_with_emotion(bigram_model, min_size, target_emotion):
    while True:
        text = ['$']
        for _ in range(min_size):  
            possible_words = list(bigram_model.bigram_counts[text[-1]].keys())
            if not possible_words:
                break  

            probabilities = []
            for word in possible_words:
                word_probability = bigram_model.get_probability(text[-1], word)
                word_emotions = emotion_scores(word)
                emotion_score = next((e['score'] for e in word_emotions if e['label'] == target_emotion), 0)
                adjusted_probability = word_probability + emotion_score
                probabilities.append((word, adjusted_probability))

            total_prob = sum(prob for _, prob in probabilities)
            normalized_probs = [(word, prob / total_prob) for word, prob in probabilities]

            next_word = np.random.choice([word for word, _ in normalized_probs], p=[prob for _, prob in normalized_probs])
            text.append(next_word)

        generated_text = ' '.join(text[1:])  
        if len(generated_text.split()) >= min_size:
            break

    generated_text_emotion_scores = emotion_scores(generated_text)
    return generated_text, generated_text_emotion_scores

kn_model = BigramLM.load_model('kn_model.pkl')

def write_generated_texts_to_file(bigram_lm, sample_size, target_emotion, num_samples, file_name):
    with open(file_name, 'w') as file:
        for i in range(num_samples):
            generated_text, generated_text_emotion_scores = generate_text_with_emotion(bigram_lm,sample_size, target_emotion)
            file.write(f"{generated_text}\n")

In [None]:
min_sample_size = 10
target_emotion = 'joy' 
file = 'gen_joy3.txt'  
write_generated_texts_to_file(kn_model, min_sample_size, target_emotion, 50, file)

In [None]:
min_sample_size = 10
target_emotion = 'love' 
file = 'gen_love3.txt'  
write_generated_texts_to_file(kn_model, min_sample_size, target_emotion, 50, file)

In [None]:
min_sample_size = 10
target_emotion = 'anger' 
file = 'gen_anger3.txt'  
write_generated_texts_to_file(kn_model, min_sample_size, target_emotion, 50, file)

In [None]:
min_sample_size = 10
target_emotion = 'surprise' 
file = 'gen_surprise3.txt'  
write_generated_texts_to_file(kn_model, min_sample_size, target_emotion, 50, file)

In [None]:
min_sample_size = 10
target_emotion = 'fear' 
file = 'gen_fear3.txt'  
write_generated_texts_to_file(kn_model, min_sample_size, target_emotion, 50, file)

In [None]:
min_sample_size = 10
target_emotion = 'sadness' 
file = 'gen_sadness3.txt'  
write_generated_texts_to_file(kn_model, min_sample_size, target_emotion, 50, file)

EXTRINISIC EVALUATION

In [1]:
import os

def merge_text_files(file_list, output_file):
    with open(output_file, 'w') as outfile:
        for fname in file_list:
            if os.path.isfile(fname):
                with open(fname, 'r') as infile:
                    outfile.write(infile.read())
            else:
                print(f"File not found: {fname}")

file_list = ['gen_sadness2.txt', 'gen_joy2.txt', 'gen_anger2.txt', 'gen_fear2.txt', 'gen_love2.txt', 'gen_surprise2.txt']
output_file = 'merged_file1.txt'
merge_text_files(file_list, output_file)

In [7]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

def preprocess(text):
        ts = []
        for t in text:
            t = t.replace('.', ' ').replace(',', ' ').replace('?', ' ').replace('!', ' ')
            t = t.lower()
            ts.append(t)
        return ts

with open(file_path, 'r') as file:
    corpus = file.readlines()

with open('merged_file1.txt', 'r') as file:
    Test_data = file.readlines()

with open('/Users/shrutijha/Desktop/labels.txt', 'r') as file:
    labels = file.readlines()


label_mapping = {'sadness': 0, 'joy': 1, 'anger': 2, 'fear': 3, 'love': 4, 'surprise': 5}
labels = np.array([label_mapping[label.strip()] for label in labels])

Train_data = preprocess(corpus)

y_test = [0]*50 + [1]*50 + [2]*50 + [3]*50 + [4]*50 + [5]*50

tfidf_vectorizer = TfidfVectorizer()
X_train = tfidf_vectorizer.fit_transform(Train_data)
X_test = tfidf_vectorizer.transform(Test_data)  

svc = SVC(kernel='linear')  
svc.fit(X_train, labels)

y_pred = svc.predict(X_test)
print('Accuracy:', accuracy_score(y_test, y_pred))
print('Classification Report:')
print(classification_report(y_test, y_pred))

Accuracy: 0.6033333333333334
Classification Report:
              precision    recall  f1-score   support

           0       0.52      0.60      0.56        50
           1       0.40      0.48      0.44        50
           2       0.51      0.36      0.42        50
           3       0.57      0.48      0.52        50
           4       0.71      0.94      0.81        50
           5       0.97      0.76      0.85        50

    accuracy                           0.60       300
   macro avg       0.61      0.60      0.60       300
weighted avg       0.61      0.60      0.60       300



In [6]:
param_grid = {
    'C': [0.1, 0.5, 0.75, 1, 1.5, 3, 5, 10], 
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid']
}

grid_search = GridSearchCV(SVC(), param_grid, cv=5)
grid_search.fit(X_train, labels)

print('Best parameters found:', grid_search.best_params_)

best_svc = grid_search.best_estimator_
test_predictions = best_svc.predict(X_test)
print('Test accuracy:', accuracy_score(y_test, test_predictions))
print(classification_report(y_test, test_predictions))

Best parameters found: {'C': 1.5, 'kernel': 'linear'}
Test accuracy: 0.6166666666666667
              precision    recall  f1-score   support

           0       0.53      0.64      0.58        50
           1       0.45      0.46      0.46        50
           2       0.49      0.36      0.41        50
           3       0.57      0.52      0.54        50
           4       0.71      0.94      0.81        50
           5       0.97      0.78      0.87        50

    accuracy                           0.62       300
   macro avg       0.62      0.62      0.61       300
weighted avg       0.62      0.62      0.61       300

