In [1]:
#Imports

import re
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

## StreamLit UI

In [2]:
import streamlit as st
st.write("# Text Emotions Prediction")
t1 = st.text_input("Enter any text>>: ")

2024-02-28 19:37:10.286 
  command:

    streamlit run C:\Users\deept\AppData\Roaming\Python\Python310\site-packages\ipykernel_launcher.py [ARGUMENTS]


## Read Data

In [3]:
def read_data(file):
    data = []
    with open(file,'r') as f:
        for line in f:
            line = line.strip()
            label = ' '.join(line[1:line.find("]")].strip().split())
            text = line[line.find("]")+1:].strip()
            data.append([label,text])
        return data

file = "text_emot.txt"
data = read_data(file)
print("Number of instances: {}".format(len(data)))

Number of instances: 7480


## Generate Features

In [4]:
def ngram(token,n):
    output = []
    
    for i in range(n-1,len(token)):
        ngram = " ".join(token[i-n+1:i+1])
        output.append(ngram)
    
    return output

def create_features(text,nrange=(1,1)):
    text_features = []    
    text = text.lower()    
    text_alphanum = re.sub('[^a-z0-9#]',' ',text)
    
    for n in range(nrange[0],nrange[1]+1):
        text_features += ngram(text_alphanum.split(),n)
        
    text_punc = re.sub('[a-z0-9]'," ",text)
    
    text_features += ngram(text_punc.split(),1)
    
    return Counter(text_features)

In [5]:
print(create_features("I love you!"))
print(create_features(" aly wins the gold!!!"))
print(create_features(" aly wins the gold!!!!!", (1, 2)))

Counter({'i': 1, 'love': 1, 'you': 1, '!': 1})
Counter({'aly': 1, 'wins': 1, 'the': 1, 'gold': 1, '!!!': 1})
Counter({'aly': 1, 'wins': 1, 'the': 1, 'gold': 1, 'aly wins': 1, 'wins the': 1, 'the gold': 1, '!!!!!': 1})


## Convert Labels to Emotions

In [6]:
def convert_label(item,name):
    items = list(map(float,item.split()))
    
    label = ""
    
    for idx in range(len(items)):
        if items[idx] == 1:
            label += name[idx] +" "
            
    return label.strip()

In [7]:
emotions = ["joy", 'fear', "anger", "sadness", "disgust", "shame", "guilt"]

X_all = []
y_all = []

for label,text in data:
    y_all.append(convert_label(label,emotions))
    X_all.append(create_features(text,nrange=(1,4)))

In [8]:
print("features example: ")
print(X_all[0])
print("Label example:")
print(y_all[0])

features example: 
Counter({'time': 2, 'we': 2, 'met': 2, 'during': 1, 'the': 1, 'period': 1, 'of': 1, 'falling': 1, 'in': 1, 'love': 1, 'each': 1, 'that': 1, 'and': 1, 'especially': 1, 'when': 1, 'had': 1, 'not': 1, 'for': 1, 'a': 1, 'long': 1, 'during the': 1, 'the period': 1, 'period of': 1, 'of falling': 1, 'falling in': 1, 'in love': 1, 'love each': 1, 'each time': 1, 'time that': 1, 'that we': 1, 'we met': 1, 'met and': 1, 'and especially': 1, 'especially when': 1, 'when we': 1, 'we had': 1, 'had not': 1, 'not met': 1, 'met for': 1, 'for a': 1, 'a long': 1, 'long time': 1, 'during the period': 1, 'the period of': 1, 'period of falling': 1, 'of falling in': 1, 'falling in love': 1, 'in love each': 1, 'love each time': 1, 'each time that': 1, 'time that we': 1, 'that we met': 1, 'we met and': 1, 'met and especially': 1, 'and especially when': 1, 'especially when we': 1, 'when we had': 1, 'we had not': 1, 'had not met': 1, 'not met for': 1, 'met for a': 1, 'for a long': 1, 'a long t

## Split Data

In [12]:
X_train,X_test,y_train,y_test = train_test_split(X_all,y_all,test_size=0.2,random_state=123)

def train_test(clf,X_train,X_test,y_train,y_test):
    clf.fit(X_train,y_train)
    train_acc = accuracy_score(y_train,clf.predict(X_train))
    test_acc = accuracy_score(y_test,clf.predict(X_test))
    return train_acc,test_acc

from sklearn.feature_extraction import DictVectorizer
vect = DictVectorizer(sparse=True)
X_train = vect.fit_transform(X_train)
X_test = vect.transform(X_test)

## Model Building

In [13]:
svc = SVC()
lsvc = LinearSVC(random_state=123)
rforest = RandomForestClassifier(random_state=123)
dtree = DecisionTreeClassifier()

clifs = [svc,lsvc,rforest,dtree]

# train and test them 
print("| {:25} | {} | {} |".format("Classifier", "Training Accuracy", "Test Accuracy"))
print("| {} | {} | {} |".format("-"*25, "-"*17, "-"*13))

for clf in clifs:
    clf_name = clf.__class__.__name__
    train_acc,test_acc = train_test(clf,X_train,X_test,y_train,y_test)
    print("| {:25} | {:17.7f} | {:13.7f} |".format(clf_name, train_acc, test_acc))

| Classifier                | Training Accuracy | Test Accuracy |
| ------------------------- | ----------------- | ------------- |
| SVC                       |         0.9067513 |     0.4512032 |




| LinearSVC                 |         0.9988302 |     0.5768717 |
| RandomForestClassifier    |         0.9988302 |     0.5541444 |
| DecisionTreeClassifier    |         0.9988302 |     0.4598930 |


In [19]:
l = ["joy", 'fear', "anger", "sadness", "disgust", "shame", "guilt"]

l.sort()
label_freq = {}

for label, _ in data:    
    #print(label)
    label_freq[label] = label_freq.get(label,0) + 1
    #print(label_freq)
    
label_freq

# print the labels and their counts in sorted order 
for l in sorted(label_freq,key=label_freq.get,reverse=True):
    print("{:10}({})  {}".format(convert_label(l, emotions), l, label_freq[l]))

joy       (1. 0. 0. 0. 0. 0. 0.)  1084
anger     (0. 0. 1. 0. 0. 0. 0.)  1080
sadness   (0. 0. 0. 1. 0. 0. 0.)  1079
fear      (0. 1. 0. 0. 0. 0. 0.)  1078
disgust   (0. 0. 0. 0. 1. 0. 0.)  1057
guilt     (0. 0. 0. 0. 0. 0. 1.)  1057
shame     (0. 0. 0. 0. 0. 1. 0.)  1045


## Working via UI

In [22]:
emoji_dict = {"joy":"😂", "fear":"😱", "anger":"😠", "sadness":"😢", "disgust":"😒", "shame":"😳", "guilt":"😳"}

texts = [t1]  #Getting user input from streamlit UI

for text in texts:
    features = create_features(text,nrange=(1,4))
    features = vect.transform(features)
    #print(features)
    prediction = clf.predict(features)[0]
    print(prediction)
    st.write(emoji_dict[prediction])


guilt
