# Subjectivity Detection

### 1. Gathering Data

In [4]:
import pandas as pd

# Path to your CSV file
csv_file = '../../data/test_sample.csv'

# Read CSV into a DataFrame
df = pd.read_csv(csv_file)

df
df["lemmatized"]


0              looove product highly recommend awesome
1                        bad experience ever never buy
2                      meh okay guess kinda boring tho
3           absolutely amazing service www company com
4                                  brand guy rock keep
5    totally disappointed delivery late product broken
6                      okay nothing special 310 may be
7                              love color fit terrible
8                 refund worth price http bad shop com
9                           thanks brand quick support
Name: lemmatized, dtype: object

### 2. Subjectivity Detection Example

In [7]:
import sys
sys.path.append('/home/mca/Opinion-Mining-Project/src')

from tokenization_utils import tokenize_sentence 
from preprocess_text import better_sentence_splitter
# from training_model import get_obj_docs_extra,get_subj_docs_extra
#nltk.corpus.subjectivity is a built-in NLTK corpus that contains a dataset of subjective and objective sentences extracted from movie reviews. 
from nltk.corpus import subjectivity
# NaiveBayesClassifier is a simple probabilistic classifier from NLTK that we use to train on those labeled sentences so it learns to classify new sentences as subjective or objective.
from nltk.classify import NaiveBayesClassifier
from nltk.classify.util import accuracy
#sent_tokenize is used to split raw text (big sentences) into sentences before classifying each one.
from nltk.tokenize import sent_tokenize
import nltk
nltk.download('subjectivity')
# We create subj_docs and obj_docs as training data for the classifier.
# The classifier uses this data to learn how to tell if a sentence is an opinion or a fact.
subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')]
obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')] 

# - Pair features with labels.
additional_subj_docs = [
    (['brand', 'guy', 'rock', 'keep'], 'subj'),
    (['thanks', 'brand', 'quick', 'support'], 'subj'),
    (['awesome', 'job'], 'subj'),
    (['love', 'this'], 'subj'),
    (['great', 'service'], 'subj'),
    (['really', 'liked', 'it'], 'subj'),
    (['highly', 'recommended'], 'subj'),
    (['fantastic', 'work'], 'subj'),
    (['well', 'done'], 'subj'),
    (['perfect', 'experience'], 'subj')
]
additional_subj_docs += [
    (['excellent', 'product'], 'subj'),
    (['very', 'happy', 'with', 'this'], 'subj'),
    (['not', 'what', 'I', 'expected'], 'subj'),
    (['disappointed', 'with', 'service'], 'subj'),
    (['highly', 'suggest', 'to', 'try'], 'subj'),
    (['would', 'buy', 'again'], 'subj'),
    (['terrible', 'quality'], 'subj'),
    (['best', 'purchase', 'ever'], 'subj'),
    (['not', 'recommend'], 'subj'),
    (['loved', 'it'], 'subj'),
]
subj_docs_extended = subj_docs + additional_subj_docs
# Feature extractor example: presence of words
def extract_features(words):
#extract_features(['The', 'movie', 'was', 'Great'])
# {
#   'the': True,
#   'movie': True,
#   'was': True,
#   'great': True
# }
    return {word.lower(): True for word in words}
# Prepare training data
# - Combine all subjective and objective labeled sentences.

# - Convert each sentence into a feature dictionary using extract_features.
# Combine with original subjective documents
# - Store all pairs in train for classifier training.
train = [(extract_features(doc), label) for doc, label in subj_docs_extended + obj_docs]
# Train classifier
classifier = NaiveBayesClassifier.train(train)
all_labels = []
for index, row in df.iterrows():
    lemmatized_text = row['lemmatized']
    print("l",lemmatized_text)
    sentences = better_sentence_splitter(lemmatized_text)
    print("sentences : ",sentences)
    sentence_labels = []
    for sentence in sentences:
        words = tokenize_sentence(sentence)   # word tokenizer
        features = extract_features(words)
        print("after extracting features : ", features)
        label = classifier.classify(features)
        sentence_labels.append(label)
        print(f"{label.upper():>4} → {sentence}")
    all_labels.append(sentence_labels)
df['sentence_subjectivity'] = all_labels
df


[nltk_data] Downloading package subjectivity to /home/mca/nltk_data...
[nltk_data]   Package subjectivity is already up-to-date!


l looove product highly recommend awesome
sentences :  ['looove product highly recommend awesome']
['looove', 'product', 'highly', 'recommend', 'awesome']
after extracting features :  {'looove': True, 'product': True, 'highly': True, 'recommend': True, 'awesome': True}
SUBJ → looove product highly recommend awesome
l bad experience ever never buy
sentences :  ['bad experience ever never buy']
['bad', 'experience', 'ever', 'never', 'buy']
after extracting features :  {'bad': True, 'experience': True, 'ever': True, 'never': True, 'buy': True}
SUBJ → bad experience ever never buy
l meh okay guess kinda boring tho
sentences :  ['meh okay guess kinda boring tho']
['meh', 'okay', 'guess', 'kinda', 'boring', 'tho']
after extracting features :  {'meh': True, 'okay': True, 'guess': True, 'kinda': True, 'boring': True, 'tho': True}
SUBJ → meh okay guess kinda boring tho
l absolutely amazing service www company com
sentences :  ['absolutely amazing service www company com']
['absolutely', 'amazin

Unnamed: 0.1,Unnamed: 0,text,label,clean_text,no_stopwords,no_emoji,segmented,tokenized,lemmatized,sentence_subjectivity
0,0,I LOOOVE this product 😍😍!!! Highly recommended...,positive,i looove this product 😍😍 highly recommended aw...,loooveproduct😍😍highlyrecommendedawesome,loooveproducthighlyrecommendedawesome,looove product highly recommended awesome,"['looove', 'product', 'highly', 'recommended',...",looove product highly recommend awesome,[subj]
1,1,Worst. Experience. Ever. Will NEVER buy again!...,negative,worst experience ever will never buy again 🤮🤬,worstexperienceeverneverbuy🤮🤬,worstexperienceeverneverbuy,worst experience ever never buy,"['worst', 'experience', 'ever', 'never', 'buy']",bad experience ever never buy,[subj]
2,2,"meh... it was okay, I guess. kinda boring tho 🙄",neutral,meh it was okay i guess kinda boring tho 🙄,mehokayguesskindaboringtho🙄,mehokayguesskindaboringtho,meh okay guess kinda boring tho,"['meh', 'okay', 'guess', 'kinda', 'boring', 't...",meh okay guess kinda boring tho,[subj]
3,3,ABSOLUTELY AMAZING SERVICE!!! 😍💯 www.company.com,positive,absolutely amazing service 😍💯 wwwcompanycom,absolutelyamazingservice😍💯wwwcompanycom,absolutelyamazingservicewwwcompanycom,absolutely amazing service www company com,"['absolutely', 'amazing', 'service', 'www', 'c...",absolutely amazing service www company com,[subj]
4,4,@brand You guys rock! Keep it up 👏🔥🔥🔥,positive,brand you guys rock keep it up 👏🔥🔥🔥,brandguysrockkeep👏🔥🔥🔥,brandguysrockkeep,brand guys rock keep,"['brand', 'guys', 'rock', 'keep']",brand guy rock keep,[obj]
5,5,"Totally disappointed. Delivery late, product b...",negative,totally disappointed delivery late product bro...,totallydisappointeddeliverylateproductbroken😡,totallydisappointeddeliverylateproductbroken,totally disappointed delivery late product broken,"['totally', 'disappointed', 'delivery', 'late'...",totally disappointed delivery late product broken,[subj]
6,6,just okay. nothing special. 3/10 maybe 🤷‍♂️,neutral,just okay nothing special 310 maybe 🤷‍♂️,okaynothingspecial310maybe🤷‍♂️,okaynothingspecial310maybe,okay nothing special 310 may be,"['okay', 'nothing', 'special', '310', 'may', '...",okay nothing special 310 may be,[subj]
7,7,"Loved the color, but the fit was terrible :(",negative,loved the color but the fit was terrible,lovedcolorfitterrible,lovedcolorfitterrible,loved color fit terrible,"['loved', 'color', 'fit', 'terrible']",love color fit terrible,[subj]
8,8,Refunded. Not worth the price!!! http://badsho...,negative,refunded not worth the price httpbadshopcom,refundedworthpricehttpbadshopcom,refundedworthpricehttpbadshopcom,refunded worth price http bad shop com,"['refunded', 'worth', 'price', 'http', 'bad', ...",refund worth price http bad shop com,[obj]
9,9,Thanks @brand for the quick support!!,positive,thanks brand for the quick support,thanksbrandquicksupport,thanksbrandquicksupport,thanks brand quick support,"['thanks', 'brand', 'quick', 'support']",thanks brand quick support,[subj]


### 3. Change to csv

In [8]:
df.to_csv('../../data/test_sample.csv')
