In [1]:
import pandas as pd
import nltk

In [2]:
from nltk.tokenize import word_tokenize

In [3]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\mccal\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [4]:
from nltk.corpus import stopwords
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\mccal\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
from sklearn.linear_model import LogisticRegression

In [8]:
from sklearn.metrics import accuracy_score, classification_report

In [9]:
emotions=pd.read_csv('emotions.csv')

In [10]:
emotions.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 416809 entries, 0 to 416808
Data columns (total 2 columns):
 #   Column  Non-Null Count   Dtype 
---  ------  --------------   ----- 
 0   text    416809 non-null  object
 1   label   416809 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 6.4+ MB


In [11]:
emotions.head()

Unnamed: 0,text,label
0,i just feel really helpless and heavy hearted,4
1,ive enjoyed being able to slouch about relax a...,0
2,i gave up my internship with the dmrg and am f...,4
3,i dont know i feel so lost,0
4,i am a kindergarten teacher and i am thoroughl...,4


In [12]:
def tokenize(text):
    return nltk.word_tokenize(text)

In [13]:
emotions['tokens']=emotions['text'].apply(tokenize)

In [14]:
stop_words = set(stopwords.words('english'))

In [15]:
emotions['filtered_tokens'] = emotions['tokens'].apply(lambda x: [word for word in x if word not in stop_words])

In [16]:
emotions.head()

Unnamed: 0,text,label,tokens,filtered_tokens
0,i just feel really helpless and heavy hearted,4,"[i, just, feel, really, helpless, and, heavy, ...","[feel, really, helpless, heavy, hearted]"
1,ive enjoyed being able to slouch about relax a...,0,"[ive, enjoyed, being, able, to, slouch, about,...","[ive, enjoyed, able, slouch, relax, unwind, fr..."
2,i gave up my internship with the dmrg and am f...,4,"[i, gave, up, my, internship, with, the, dmrg,...","[gave, internship, dmrg, feeling, distraught]"
3,i dont know i feel so lost,0,"[i, dont, know, i, feel, so, lost]","[dont, know, feel, lost]"
4,i am a kindergarten teacher and i am thoroughl...,4,"[i, am, a, kindergarten, teacher, and, i, am, ...","[kindergarten, teacher, thoroughly, weary, job..."


In [17]:
emotions['text_combined'] = emotions['filtered_tokens'].apply(lambda x: ' '.join(x))

In [18]:
vectorizer = TfidfVectorizer()

In [19]:
X = vectorizer.fit_transform(emotions['text_combined'])

In [20]:
y=emotions['label']

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [22]:
model = LogisticRegression(penalty='l2', max_iter=200)  # Default is 'l2'

In [23]:
model.fit(X_train, y_train)

In [24]:
y_pred = model.predict(X_test)

In [25]:
input_text = "I am so happy and filled with joy!"

# Preprocess the input text
input_tokens = nltk.word_tokenize(input_text.lower())
filtered_input_tokens = [word for word in input_tokens if word not in stop_words]
input_combined = ' '.join(filtered_input_tokens)

# Transform the input text using the same vectorizer
input_vector = vectorizer.transform([input_combined])

# Get the probability scores for each label
probabilities = model.predict_proba(input_vector)

# Output the probabilities
print("Probability scores for each label:")
for i, prob in enumerate(probabilities[0]):
    print(f"Emotion code {i}: {prob:.4f}")

Probability scores for each label:
Emotion code 0: 0.0268
Emotion code 1: 0.9043
Emotion code 2: 0.0401
Emotion code 3: 0.0140
Emotion code 4: 0.0068
Emotion code 5: 0.0080


In [33]:
probabilities

array([[0.02682203, 0.90432754, 0.04010104, 0.01395425, 0.00684164,
        0.0079535 ]])

In [37]:
emotions

Unnamed: 0,text,label,tokens,filtered_tokens,text_combined
0,i just feel really helpless and heavy hearted,4,"[i, just, feel, really, helpless, and, heavy, ...","[feel, really, helpless, heavy, hearted]",feel really helpless heavy hearted
1,ive enjoyed being able to slouch about relax a...,0,"[ive, enjoyed, being, able, to, slouch, about,...","[ive, enjoyed, able, slouch, relax, unwind, fr...",ive enjoyed able slouch relax unwind frankly n...
2,i gave up my internship with the dmrg and am f...,4,"[i, gave, up, my, internship, with, the, dmrg,...","[gave, internship, dmrg, feeling, distraught]",gave internship dmrg feeling distraught
3,i dont know i feel so lost,0,"[i, dont, know, i, feel, so, lost]","[dont, know, feel, lost]",dont know feel lost
4,i am a kindergarten teacher and i am thoroughl...,4,"[i, am, a, kindergarten, teacher, and, i, am, ...","[kindergarten, teacher, thoroughly, weary, job...",kindergarten teacher thoroughly weary job take...
...,...,...,...,...,...
416804,i feel like telling these horny devils to find...,2,"[i, feel, like, telling, these, horny, devils,...","[feel, like, telling, horny, devils, find, sit...",feel like telling horny devils find site suite...
416805,i began to realize that when i was feeling agi...,3,"[i, began, to, realize, that, when, i, was, fe...","[began, realize, feeling, agitated, restless, ...",began realize feeling agitated restless would ...
416806,i feel very curious be why previous early dawn...,5,"[i, feel, very, curious, be, why, previous, ea...","[feel, curious, previous, early, dawn, time, s...",feel curious previous early dawn time seek tro...
416807,i feel that becuase of the tyranical nature of...,3,"[i, feel, that, becuase, of, the, tyranical, n...","[feel, becuase, tyranical, nature, government,...",feel becuase tyranical nature government el sa...
