In [None]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score

In [None]:
df = pd.read_csv('/content/tweet_2.csv')
df

Unnamed: 0,text,label
0,i didnt feel humiliated,0
1,i can go from feeling so hopeless to so damned...,0
2,im grabbing a minute to post i feel greedy wrong,3
3,i am ever feeling nostalgic about the fireplac...,2
4,i am feeling grouchy,3
...,...,...
15995,i just had a very brief time in the beanbag an...,0
15996,i am now turning and i feel pathetic that i am...,0
15997,i feel strong and good overall,1
15998,i feel like this was such a rude comment and i...,3


In [None]:
# now working on NLP   sadness (0), joy (1), love (2), anger (3), fear (4). 

# lemmetizer
import spacy
nlp = spacy.load("en_core_web_sm")

def lemma_this(text):
    doc = nlp(text)

    str_lem = ''

    for token in doc:
        str_lem += token.lemma_+" "

    return str_lem

df['text'] = df['text'].apply(lemma_this)

df

Unnamed: 0,text,label
0,I do not feel humiliate,0
1,I can go from feel so hopeless to so damned ho...,0
2,I m grab a minute to post I feel greedy wrong,3
3,I be ever feel nostalgic about the fireplace I...,2
4,I be feel grouchy,3
...,...,...
15995,I just have a very brief time in the beanbag a...,0
15996,I be now turn and I feel pathetic that I be st...,0
15997,I feel strong and good overall,1
15998,I feel like this be such a rude comment and I ...,3


In [None]:


# stopwords removing
nltk.download('punkt')
nltk.download('stopwords')

stop_words = set(stopwords.words('english'))


def extract_keywords(text):
    words = nltk.word_tokenize(text)
    keywords = [word.lower() for word in words if word.lower() not in stop_words and word.isalpha()]
    return " ".join(keywords)

df['text'] = df['text'].apply(extract_keywords)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
df

Unnamed: 0,text,label
0,feel humiliate,0
1,go feel hopeless damned hopeful around someone...,0
2,grab minute post feel greedy wrong,3
3,ever feel nostalgic fireplace know still property,2
4,feel grouchy,3
...,...,...
15995,brief time beanbag say anna feel like beat,0
15996,turn feel pathetic still wait table sub teachi...,0
15997,feel strong good overall,1
15998,feel like rude comment glad,3


In [None]:
PL = Pipeline( [
    
    (('vectorizer'), CountVectorizer()),
    ('random_forest', RandomForestClassifier())
    
])

In [None]:
PL.fit(df.text, df.label)

In [None]:
PL.score(df.text, df.label)

0.997625

In [None]:
score = cross_val_score(PL, df.text, df.label, cv = 10)
score.mean()

0.8616875

In [None]:
joyful_sentences = [
    "I am so happy today!",
    "I love spending time with my family and friends.",
    "I am grateful for all the good things in my life.",
    "I am looking forward to all the wonderful things that the future holds.",
    "I am excited to make a difference in the world.",
    "I am filled with joy and love."
]


In [None]:
PL.predict(joyful_sentences)

array([1, 2, 1, 1, 1, 2])

In [None]:
anger_sentences = [
    "I am so angry right now!",
    "I can't believe you did that!",
    "You're so inconsiderate!",
    "I'm so mad I could spit!",
    "I'm going to lose my temper!",
    "I'm so furious I could explode!"
]
PL.predict(anger_sentences)

array([3, 3, 3, 3, 0, 3])

In [None]:
love_sentences = [
    "I love you",
    "I care about you",
    "You are special to me",
    "I am grateful for you",
    "I am lucky to have you in my life",
    "I love spending time with you",
    "You make me happy",
    "You are my best friend",
    "I can't imagine my life without you",
    "I love you more than words can say"
]
PL.predict(love_sentences)

array([2, 2, 1, 3, 1, 2, 1, 3, 3, 1])

In [None]:
def myownpipe(text):
    text = lemma_this(text)
    text = extract_keywords(text)
    return PL.predict([text])

myownpipe("I love you more than words can say")

array([2])

In [None]:
for i in love_sentences:
    print(myownpipe(i))

[2]
[2]
[1]
[3]
[1]
[2]
[1]
[1]
[3]
[2]


In [None]:
import pickle

with open('model_pickle','wb') as f:
    pickle.dump(PL,f)