### importing libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import nltk

### loading dataset

In [3]:
dataset=pd.read_csv('C://Users//ahmad//OneDrive//Desktop//New folder (2)/train.csv')

In [4]:
dataset

Unnamed: 0,SENTENCE A,SENTENCE B,label
0,someone is dirtying an animal,a woman is cleaning a shrimp,0
1,a woman in a black dress is pulling a cart and...,a lady is dressed in black and is carrying a w...,0
2,a person is cutting garlic into pieces with a ...,someone is putting ingredients into a wok,0
3,a woman is rock climbing pausing and calculati...,a man is rock climbing and a city and a bay ar...,0
4,a rabbit is playing with a toy rabbit,there is no bunny playing with a stuffed bunny,1
...,...,...,...
6689,there is no woman standing near three children,a woman is standing near three children,1
6690,the man is standing on a rocky mountain and gr...,the man is not standing on a rocky mountain an...,1
6691,a woman is putting on eyeshadow,the woman is removing make-up,1
6692,five wooden stands are in front of each childs...,five kids are standing close together and one ...,0


### checking for null values

dataset.isnull().mean()

In [5]:
dataset['label'].value_counts()

label
0    4866
1    1828
Name: count, dtype: int64

In [6]:
nltk.download('stopwords') # download stopwords in english from nltk library

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ahmad\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [7]:
dataset['whole_sentence']=dataset['SENTENCE A']+ " " + dataset['SENTENCE B']

In [8]:
dataset['whole_sentence'].head()

0    someone is dirtying an animal a woman is clean...
1    a woman in a black dress is pulling a cart and...
2    a person is cutting garlic into pieces with a ...
3    a woman is rock climbing pausing and calculati...
4    a rabbit is playing with a toy rabbit there is...
Name: whole_sentence, dtype: object

In [9]:
import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\ahmad\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [10]:
# import these modules
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()

print("cleaning :", lemmatizer.lemmatize("saw"))
print("pulling :", lemmatizer.lemmatize("pulling"))

# a denotes adjective in "pos"
print("better :", lemmatizer.lemmatize("better", pos="a"))


cleaning : saw
pulling : pulling
better : good


In [11]:
import spacy 
nlp=spacy.load('en_core_web_sm')

### if we lemmatize stopwords what they would be looklike

In [12]:
l=['no', 'nor', 'not', 'only', 'own', 'same',
'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven'
   , "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn', "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', 
   "weren't", 'won', "won't", 'wouldn', "wouldn"]
doc2=nlp(' '.join(l))
for token in doc2:
    print(f'{token.text} {token.lemma_}')

no no
nor nor
not not
only only
own own
same same
so so
than than
too too
very very
s s
t t
can can
will will
just just
don don
do do
n't not
should should
should should
've 've
now now
d d
ll ll
m m
o o
re re
ve ve
y y
ain ain
aren aren
are be
n't not
couldn couldn
could could
n't not
didn didn
did do
n't not
doesn doesn
does do
n't not
hadn hadn
had have
n't not
hasn hasn
has have
n't not
haven haven
have have
n't not
isn isn
is be
n't not
ma ma
mightn mightn
might might
n't not
mustn mustn
must must
n't not
needn needn
need need
n't not
shan shan
sha sha
n't not
shouldn shouldn
should should
n't not
wasn wasn
was be
n't not
weren weren
were be
n't not
won win
wo will
n't not
wouldn wouldn
wouldn wouldn


### stopwords of spacey library

In [13]:
nlp.Defaults.stop_words

{"'d",
 "'ll",
 "'m",
 "'re",
 "'s",
 "'ve",
 'a',
 'about',
 'above',
 'across',
 'after',
 'afterwards',
 'again',
 'against',
 'all',
 'almost',
 'alone',
 'along',
 'already',
 'also',
 'although',
 'always',
 'am',
 'among',
 'amongst',
 'amount',
 'an',
 'and',
 'another',
 'any',
 'anyhow',
 'anyone',
 'anything',
 'anyway',
 'anywhere',
 'are',
 'around',
 'as',
 'at',
 'back',
 'be',
 'became',
 'because',
 'become',
 'becomes',
 'becoming',
 'been',
 'before',
 'beforehand',
 'behind',
 'being',
 'below',
 'beside',
 'besides',
 'between',
 'beyond',
 'both',
 'bottom',
 'but',
 'by',
 'ca',
 'call',
 'can',
 'cannot',
 'could',
 'did',
 'do',
 'does',
 'doing',
 'done',
 'down',
 'due',
 'during',
 'each',
 'eight',
 'either',
 'eleven',
 'else',
 'elsewhere',
 'empty',
 'enough',
 'even',
 'ever',
 'every',
 'everyone',
 'everything',
 'everywhere',
 'except',
 'few',
 'fifteen',
 'fifty',
 'first',
 'five',
 'for',
 'former',
 'formerly',
 'forty',
 'four',
 'from',
 'fron

In [14]:
stop_list= ['i','is','be','are','am','me','had', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours',
            'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself',
             'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who',
            'whom', 'this', 'that', 'these', 'those', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until',
            'while', 'of', 'at', 'by', 'for', 'with', 'about', 'through', 'during', 'to', 'from', 'further', 'then', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'other'
            , 'such',  'only', 'own', 'same', 'so','a','than', 'too', 'can', 'will', 'just', 'now',
           'because','become','becomes','will','with','within','without','whenever','whereafter','while','whole','whom','whose',
            'why','would','what','twelve','twenty','two','very','too','quite','rather','re','really','regarding','same','say','see','seem','seemed','seeming','seems','several','she','should','show','side','since','six','sixty','so','somehow','someone','something','somewhere','still','such','take','ten','than','that','the','their','them','themselves','then','thence','there','thereafter','thereby','therefore','therein','thereupon','these','they','third','this','those','though','three','through']


### the reason i madeup my own stop words list is because in libraries , they consider the negative verbs as stopwords too so i should remove them

In [15]:
list1=[]
def lemmatize(text):
    text=nlp(text)
    l_list=[]
    for i in text:
        
        l_list.append(i.lemma_)
    stemmed_content=' '.join(l_list)
    return stemmed_content

In [16]:
dataset['whole_sentence1']=dataset['whole_sentence'].apply(lemmatize)
dataset.head()

Unnamed: 0,SENTENCE A,SENTENCE B,label,whole_sentence,whole_sentence1
0,someone is dirtying an animal,a woman is cleaning a shrimp,0,someone is dirtying an animal a woman is clean...,someone be dirty an animal a woman be clean a ...
1,a woman in a black dress is pulling a cart and...,a lady is dressed in black and is carrying a w...,0,a woman in a black dress is pulling a cart and...,a woman in a black dress be pull a cart and be...
2,a person is cutting garlic into pieces with a ...,someone is putting ingredients into a wok,0,a person is cutting garlic into pieces with a ...,a person be cut garlic into piece with a knife...
3,a woman is rock climbing pausing and calculati...,a man is rock climbing and a city and a bay ar...,0,a woman is rock climbing pausing and calculati...,a woman be rock climb pausing and calculate th...
4,a rabbit is playing with a toy rabbit,there is no bunny playing with a stuffed bunny,1,a rabbit is playing with a toy rabbit there is...,a rabbit be play with a toy rabbit there be no...


In [17]:
def stemming(content):
    stemmed_content=re.sub('[^a-zA-z.,]',' ',content)
    stemmed_content=stemmed_content.lower()
    stemmed_content=stemmed_content.split()
    stemmed_content=[word for word in stemmed_content if not word in stop_list]
    print(stemmed_content)
    stemmed_content=' '.join(stemmed_content)
    return stemmed_content

In [None]:
dataset['whole_sentence1']=dataset['whole_sentence1'].apply(stemming)


In [19]:
dataset

Unnamed: 0,SENTENCE A,SENTENCE B,label,whole_sentence,whole_sentence1
0,someone is dirtying an animal,a woman is cleaning a shrimp,0,someone is dirtying an animal a woman is clean...,dirty animal woman clean shrimp
1,a woman in a black dress is pulling a cart and...,a lady is dressed in black and is carrying a w...,0,a woman in a black dress is pulling a cart and...,woman in black dress pull cart stand in front ...
2,a person is cutting garlic into pieces with a ...,someone is putting ingredients into a wok,0,a person is cutting garlic into pieces with a ...,person cut garlic into piece knife put ingredi...
3,a woman is rock climbing pausing and calculati...,a man is rock climbing and a city and a bay ar...,0,a woman is rock climbing pausing and calculati...,woman rock climb pausing calculate route man r...
4,a rabbit is playing with a toy rabbit,there is no bunny playing with a stuffed bunny,1,a rabbit is playing with a toy rabbit there is...,rabbit play toy rabbit no bunny play stuff bunny
...,...,...,...,...,...
6689,there is no woman standing near three children,a woman is standing near three children,1,there is no woman standing near three children...,no woman stand near child woman stand near child
6690,the man is standing on a rocky mountain and gr...,the man is not standing on a rocky mountain an...,1,the man is standing on a rocky mountain and gr...,man stand on rocky mountain gray cloud in back...
6691,a woman is putting on eyeshadow,the woman is removing make-up,1,a woman is putting on eyeshadow the woman is r...,woman put on eyeshadow woman remove make up
6692,five wooden stands are in front of each childs...,five kids are standing close together and one ...,0,five wooden stands are in front of each childs...,five wooden stand in front child hut five kid ...


# *********************************************************************************************

In [38]:
type(dataset['whole_sentence'].values)

numpy.ndarray

In [39]:
x=dataset['whole_sentence1'].values
y=dataset['label'].values

### vectorizing

In [40]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer(max_df=0.80,min_df=2)
x = vectorizer.fit_transform(x).toarray()

In [None]:
vectorizer = TfidfVectorizer(max_df=0.75,min_df=3)
vectorizer.fit(x)
x=vectorizer.transform(x)
x.shape

In [41]:
x.shape

(6694, 1759)

### because of having imbalanced dataset we should use tools to resample it in order to solve this problem 

In [None]:
from imblearn.over_sampling import SMOTE
sm = SMOTE()
x_train1, y_train1 = sm.fit_resample(x, y)

In [42]:
from imblearn.over_sampling import RandomOverSampler
oversample = RandomOverSampler(sampling_strategy='minority')
x_train1, y_train1 = oversample.fit_resample(x, y)

In [None]:
from imblearn.under_sampling import RandomUnderSampler
undersample = RandomUnderSampler(sampling_strategy='majority')
x_train1, y_train1 = undersample.fit_resample(x, y)

In [43]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x_train1,y_train1,test_size=0.2)


In [None]:
x_train.shape

### first model 

In [44]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=400)
classifier.fit(x_train, y_train)

In [45]:
from sklearn.metrics import f1_score,confusion_matrix
prediction=classifier.predict(x_test)
print(f'accuracy score: {round(f1_score(prediction,y_test),3)*100}')
print(confusion_matrix(prediction,y_test))

accuracy score: 95.0
[[896  26]
 [ 75 950]]


### second model

In [46]:
from sklearn.naive_bayes import MultinomialNB
classifer=MultinomialNB()
classifier.fit(x_train,y_train)


In [47]:
prediction=classifier.predict(x_test)
print(f'accuracy score: {round(f1_score(prediction,y_test),3)*100}')
print(confusion_matrix(prediction,y_test))

accuracy score: 94.8
[[894  28]
 [ 77 948]]


### third model 

In [48]:
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion = 'gini')
classifier.fit(x_train, y_train)

In [49]:
prediction=classifier.predict(x_test)
print(f'accuracy score: {round(f1_score(prediction,y_test),3)*100}')
print(confusion_matrix(prediction,y_test))

accuracy score: 92.60000000000001
[[843  25]
 [128 951]]


### fourth model

In [50]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()

In [51]:
model.fit(x_train,y_train)

In [52]:
from sklearn.metrics import f1_score,confusion_matrix
prediction=model.predict(x_test)
print(f'accuracy score: {round(f1_score(prediction,y_test),3)*100}')
print(confusion_matrix(prediction,y_test))

accuracy score: 86.3
[[808 112]
 [163 864]]


### deep learning model

In [53]:
from keras.callbacks import EarlyStopping
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1)
import tensorflow as tf
model=tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(units=128,activation='relu'))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(units=32,activation='relu'))
model.add(tf.keras.layers.Dense(units=16,activation='relu'))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(units=1,activation='sigmoid'))
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
model.fit(x_train,y_train,batch_size=16,epochs=32,validation_data=(x_test, y_test))

Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32


<keras.src.callbacks.History at 0x1e6472d9510>

In [54]:
from sklearn.metrics import f1_score,confusion_matrix
prediction=model.predict(x_test)
prediction = np.where(prediction > 0.5, 1, 0)
print(prediction)
print(f'accuracy score: {round(f1_score(prediction,y_test),3)*100}')
print(confusion_matrix(prediction,y_test))

[[0]
 [0]
 [0]
 ...
 [1]
 [0]
 [0]]
accuracy score: 92.30000000000001
[[847  34]
 [124 942]]


### last model

In [55]:
from xgboost import XGBClassifier

model = XGBClassifier().fit(x_train, y_train)

# predict
prediction = model.predict(x_test)

# accuracy score/

xgb_score = accuracy_score(prediction, y_test)

print('Accuracy score is:', xgb_score)

Accuracy score is: 0.9070364663585002


In [57]:
from sklearn.metrics import f1_score
prediction=model.predict(x_test)
print(f'accuracy score: {round(f1_score(prediction,y_test),3)*100}')

accuracy score: 91.0


# testing our model

In [261]:
import pandas as pd
test_dataset=pd.read_csv('C://Users//ahmad//OneDrive//Desktop//New folder (2)/test.csv')

In [262]:
test_dataset

Unnamed: 0,SENTENCE A,SENTENCE B
0,a woman is peeling a potato,a woman is not peeling a potato
1,two boys on a couch are reading a book,two boys on a couch are playing video games
2,the man on stage isnt singing into the microphone,a man in a suit is standing at a microphone an...
3,tom is still in a deep coma,tom is still in a light coma
4,there is no dog turning on the grass and pursu...,a dog is turning on the grass and pursuing a f...
...,...,...
739,an asian woman in in the midst of many people ...,a girl with a black bag is on a crowded train
740,a man is rapidly chopping some mushrooms with ...,there is no man rapidly chopping some mushroom...
741,two girls are laughing and other girls are wat...,there is no girl laughing and there is no othe...
742,some ingredients are being mixed in a bowl by ...,a woman is bowling two eggs to a break dancer


In [263]:
test_dataset['whole_sentence']=test_dataset['SENTENCE A'] + ' '+ test_dataset['SENTENCE B']

In [264]:
test_dataset['whole_sentence'].head()

0    a woman is peeling a potato a woman is not pee...
1    two boys on a couch are reading a book two boy...
2    the man on stage isnt singing into the microph...
3    tom is still in a deep coma tom is still in a ...
4    there is no dog turning on the grass and pursu...
Name: whole_sentence, dtype: object

In [265]:
test_dataset['whole_sentence'].shape

(744,)

In [None]:
stop_list= ['i','is','be','are','am','me','had', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours',
            'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself',
             'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who',
            'whom', 'this', 'that', 'these', 'those', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until',
            'while', 'of', 'at', 'by', 'for', 'with', 'about', 'through', 'during', 'to', 'from', 'further', 'then', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'other'
            , 'such',  'only', 'own', 'same', 'so','a','than', 'too', 'can', 'will', 'just', 'now',
           'because','become','becomes','will','with','within','without','whenever','whereafter','while','whole','whom','whose',
            'why','would','what','twelve','twenty','two','very','too','quite','rather','re','really','regarding','same','say','see','seem','seemed','seeming','seems','several','she','should','show','side','since','six','sixty','so','somehow','someone','something','somewhere','still','such','take','ten','than','that','the','their','them','themselves','then','thence','there','thereafter','thereby','therefore','therein','thereupon','these','they','third','this','those','though','three','through']


In [266]:
def stemming(content):
    stemmed_content=re.sub('[^a-zA-z.,]',' ',content)
    stemmed_content=stemmed_content.lower()
    stemmed_content=stemmed_content.split()
    stemmed_content=[word for word in stemmed_content if not word in stop_list]
    print(stemmed_content)
    stemmed_content=' '.join(stemmed_content)
    return stemmed_content

In [267]:
list24=[]
def lemmatize(text):
    text=nlp(text)
    l_list=[]
    for i in text:
        
        l_list.append(i.lemma_)
    stemmed_content=' '.join(l_list)
    return stemmed_content

In [None]:
test_dataset['whole_sentence']=test_dataset['whole_sentence'].apply(lemmatize)

In [None]:
test_dataset['whole_sentence']=test_dataset['whole_sentence'].apply(stemming)

In [270]:
x=test_dataset['whole_sentence'].values
x.shape

(744,)

In [271]:
x=vectorizer.transform(x)

In [272]:
x.shape

(744, 1549)

In [308]:
prediction=model.predict(x)
prediction = np.where(prediction > 0.5, 1, 0)
prediction.flatten()



array([0, 0, 0, ..., 1, 0, 1])

In [None]:
prediction=classifier.predict(x)
prediction.flatten()

In [275]:
import csv
with open('C:/Users/ahmad/OneDrive/Desktop/output.csv','w') as myfile:
    dw=csv.DictWriter(myfile,fieldnames=['label'])
    dw.writeheader()
    for i in list(prediction.flatten()):
        myfile.write(str(i)+'\n')
myfile.close()


### let's have a tricky solution 

# using sentiment anlysis

In [60]:
import pandas as pd
import numpy as np
import re
import spacy

In [62]:
nlp=spacy.load('en_core_web_sm')

In [63]:
import pandas as pd
test_dataset=pd.read_csv('C://Users//ahmad//OneDrive//Desktop//New folder (2)/test.csv')

In [64]:
test_dataset['whole_sentence']=test_dataset['SENTENCE A'] + ' '+ test_dataset['SENTENCE B']

In [65]:
stop_list= ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such',  'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'now', 'd', 'll', 'm', 'o', 're', 've', 'y']

In [66]:
def stemming(content):
    stemmed_content=re.sub('[^a-zA-z.,]',' ',content)
    stemmed_content=stemmed_content.lower()
    stemmed_content=stemmed_content.split()
    stemmed_content=[word for word in stemmed_content if not word in stop_list]
    stemmed_content=' '.join(stemmed_content)
    return stemmed_content

In [67]:
list24=[]
def lemmatize(text):
    text=nlp(text)
    l_list=[]
    for i in text:
        
        l_list.append(i.lemma_)
    stemmed_content=' '.join(l_list)
    return stemmed_content

In [None]:
test_dataset['whole_sentence']=test_dataset['whole_sentence'].apply(lemmatize)

In [69]:
test_dataset['whole_sentence']=test_dataset['whole_sentence'].apply(stemming)

In [70]:
test_dataset

Unnamed: 0,SENTENCE A,SENTENCE B,whole_sentence
0,a woman is peeling a potato,a woman is not peeling a potato,woman peel potato woman not peel potato
1,two boys on a couch are reading a book,two boys on a couch are playing video games,two boy couch read book two boy couch play vid...
2,the man on stage isnt singing into the microphone,a man in a suit is standing at a microphone an...,man stage not singe microphone man suit stand ...
3,tom is still in a deep coma,tom is still in a light coma,tom still deep coma tom still light coma
4,there is no dog turning on the grass and pursu...,a dog is turning on the grass and pursuing a f...,no dog turn grass pursue fly ball dog turn gra...
...,...,...,...
739,an asian woman in in the midst of many people ...,a girl with a black bag is on a crowded train,asian woman midst many people carry black bag ...
740,a man is rapidly chopping some mushrooms with ...,there is no man rapidly chopping some mushroom...,man rapidly chop mushroom knife no man rapidly...
741,two girls are laughing and other girls are wat...,there is no girl laughing and there is no othe...,two girl laugh girl watch no girl laugh no gir...
742,some ingredients are being mixed in a bowl by ...,a woman is bowling two eggs to a break dancer,ingredient mix bowl person woman bowl two egg ...


In [71]:
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid=SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\ahmad\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [72]:
test_dataset['scores']=test_dataset['whole_sentence'].apply(lambda review: sid.polarity_scores(review))

In [73]:
test_dataset

Unnamed: 0,SENTENCE A,SENTENCE B,whole_sentence,scores
0,a woman is peeling a potato,a woman is not peeling a potato,woman peel potato woman not peel potato,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
1,two boys on a couch are reading a book,two boys on a couch are playing video games,two boy couch read book two boy couch play vid...,"{'neg': 0.0, 'neu': 0.806, 'pos': 0.194, 'comp..."
2,the man on stage isnt singing into the microphone,a man in a suit is standing at a microphone an...,man stage not singe microphone man suit stand ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
3,tom is still in a deep coma,tom is still in a light coma,tom still deep coma tom still light coma,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
4,there is no dog turning on the grass and pursu...,a dog is turning on the grass and pursuing a f...,no dog turn grass pursue fly ball dog turn gra...,"{'neg': 0.155, 'neu': 0.845, 'pos': 0.0, 'comp..."
...,...,...,...,...
739,an asian woman in in the midst of many people ...,a girl with a black bag is on a crowded train,asian woman midst many people carry black bag ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
740,a man is rapidly chopping some mushrooms with ...,there is no man rapidly chopping some mushroom...,man rapidly chop mushroom knife no man rapidly...,"{'neg': 0.18, 'neu': 0.82, 'pos': 0.0, 'compou..."
741,two girls are laughing and other girls are wat...,there is no girl laughing and there is no othe...,two girl laugh girl watch no girl laugh no gir...,"{'neg': 0.237, 'neu': 0.376, 'pos': 0.387, 'co..."
742,some ingredients are being mixed in a bowl by ...,a woman is bowling two eggs to a break dancer,ingredient mix bowl person woman bowl two egg ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."


In [74]:
test_dataset['negativity']= test_dataset['scores'].apply(lambda x :x['neg'])

In [75]:
test_dataset

Unnamed: 0,SENTENCE A,SENTENCE B,whole_sentence,scores,negativity
0,a woman is peeling a potato,a woman is not peeling a potato,woman peel potato woman not peel potato,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000
1,two boys on a couch are reading a book,two boys on a couch are playing video games,two boy couch read book two boy couch play vid...,"{'neg': 0.0, 'neu': 0.806, 'pos': 0.194, 'comp...",0.000
2,the man on stage isnt singing into the microphone,a man in a suit is standing at a microphone an...,man stage not singe microphone man suit stand ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000
3,tom is still in a deep coma,tom is still in a light coma,tom still deep coma tom still light coma,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000
4,there is no dog turning on the grass and pursu...,a dog is turning on the grass and pursuing a f...,no dog turn grass pursue fly ball dog turn gra...,"{'neg': 0.155, 'neu': 0.845, 'pos': 0.0, 'comp...",0.155
...,...,...,...,...,...
739,an asian woman in in the midst of many people ...,a girl with a black bag is on a crowded train,asian woman midst many people carry black bag ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000
740,a man is rapidly chopping some mushrooms with ...,there is no man rapidly chopping some mushroom...,man rapidly chop mushroom knife no man rapidly...,"{'neg': 0.18, 'neu': 0.82, 'pos': 0.0, 'compou...",0.180
741,two girls are laughing and other girls are wat...,there is no girl laughing and there is no othe...,two girl laugh girl watch no girl laugh no gir...,"{'neg': 0.237, 'neu': 0.376, 'pos': 0.387, 'co...",0.237
742,some ingredients are being mixed in a bowl by ...,a woman is bowling two eggs to a break dancer,ingredient mix bowl person woman bowl two egg ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000


In [76]:
words =["haven\'t", 'isn', "isn\'t", 'mightn', "mightn\'t", 'mustn', "mustn\'t", 'needn', "needn\'t", 'shan', "shan\'t", 'shouldn', "shouldn\'t", 'wasn', "wasn\'t", 'weren', 
   "weren\'t", 'won', "won\'t", 'wouldn', "wouldn",'not','no',"don\'t"]
words[0]

"haven't"

In [77]:
def check(text): 
    num1=0
    num2=1
    text=text.split()
    words =["haven\'t", 'isn', "isn\'t", 'mightn', "mightn\'t", 'mustn', "mustn\'t", 'needn', "needn\'t", 'shan', "shan\'t", 'shouldn', "shouldn\'t", 'wasn', "wasn\'t", 'weren', 
   "weren\'t", 'won', "won\'t", 'wouldn', "wouldn",'not','no',"don\'t"]
    boolean=2
    for word in words:
        if str(word) in  text: 
            boolean=1
            break
        else:
            boolean=2
            
    if boolean==1:
        return num2
    else:
        return num1
    

In [78]:
test_dataset['negativity2']=test_dataset['whole_sentence'].apply(check)


In [79]:
test_dataset

Unnamed: 0,SENTENCE A,SENTENCE B,whole_sentence,scores,negativity,negativity2
0,a woman is peeling a potato,a woman is not peeling a potato,woman peel potato woman not peel potato,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,1
1,two boys on a couch are reading a book,two boys on a couch are playing video games,two boy couch read book two boy couch play vid...,"{'neg': 0.0, 'neu': 0.806, 'pos': 0.194, 'comp...",0.000,0
2,the man on stage isnt singing into the microphone,a man in a suit is standing at a microphone an...,man stage not singe microphone man suit stand ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,1
3,tom is still in a deep coma,tom is still in a light coma,tom still deep coma tom still light coma,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,0
4,there is no dog turning on the grass and pursu...,a dog is turning on the grass and pursuing a f...,no dog turn grass pursue fly ball dog turn gra...,"{'neg': 0.155, 'neu': 0.845, 'pos': 0.0, 'comp...",0.155,1
...,...,...,...,...,...,...
739,an asian woman in in the midst of many people ...,a girl with a black bag is on a crowded train,asian woman midst many people carry black bag ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,0
740,a man is rapidly chopping some mushrooms with ...,there is no man rapidly chopping some mushroom...,man rapidly chop mushroom knife no man rapidly...,"{'neg': 0.18, 'neu': 0.82, 'pos': 0.0, 'compou...",0.180,1
741,two girls are laughing and other girls are wat...,there is no girl laughing and there is no othe...,two girl laugh girl watch no girl laugh no gir...,"{'neg': 0.237, 'neu': 0.376, 'pos': 0.387, 'co...",0.237,1
742,some ingredients are being mixed in a bowl by ...,a woman is bowling two eggs to a break dancer,ingredient mix bowl person woman bowl two egg ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,0


In [80]:
def check2(x):
    x=float(x)
    num1=0
    num2=1
    if x>0:
        return num2
    else:
        return num1

In [81]:
test_dataset['negativity1']=test_dataset['negativity'].apply(check2)

In [82]:
test_dataset

Unnamed: 0,SENTENCE A,SENTENCE B,whole_sentence,scores,negativity,negativity2,negativity1
0,a woman is peeling a potato,a woman is not peeling a potato,woman peel potato woman not peel potato,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,1,0
1,two boys on a couch are reading a book,two boys on a couch are playing video games,two boy couch read book two boy couch play vid...,"{'neg': 0.0, 'neu': 0.806, 'pos': 0.194, 'comp...",0.000,0,0
2,the man on stage isnt singing into the microphone,a man in a suit is standing at a microphone an...,man stage not singe microphone man suit stand ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,1,0
3,tom is still in a deep coma,tom is still in a light coma,tom still deep coma tom still light coma,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,0,0
4,there is no dog turning on the grass and pursu...,a dog is turning on the grass and pursuing a f...,no dog turn grass pursue fly ball dog turn gra...,"{'neg': 0.155, 'neu': 0.845, 'pos': 0.0, 'comp...",0.155,1,1
...,...,...,...,...,...,...,...
739,an asian woman in in the midst of many people ...,a girl with a black bag is on a crowded train,asian woman midst many people carry black bag ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,0,0
740,a man is rapidly chopping some mushrooms with ...,there is no man rapidly chopping some mushroom...,man rapidly chop mushroom knife no man rapidly...,"{'neg': 0.18, 'neu': 0.82, 'pos': 0.0, 'compou...",0.180,1,1
741,two girls are laughing and other girls are wat...,there is no girl laughing and there is no othe...,two girl laugh girl watch no girl laugh no gir...,"{'neg': 0.237, 'neu': 0.376, 'pos': 0.387, 'co...",0.237,1,1
742,some ingredients are being mixed in a bowl by ...,a woman is bowling two eggs to a break dancer,ingredient mix bowl person woman bowl two egg ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,0,0


In [83]:
test_dataset['negetivity3']=test_dataset['negativity1']+ test_dataset['negativity2']

In [84]:
def check3(x):
    num1=1
    num2=0
    if x>0:
        return num1
    else:
        return num2

In [85]:
test_dataset['negetivity3']=test_dataset['negetivity3'].apply(check3)

In [86]:
test_dataset

Unnamed: 0,SENTENCE A,SENTENCE B,whole_sentence,scores,negativity,negativity2,negativity1,negetivity3
0,a woman is peeling a potato,a woman is not peeling a potato,woman peel potato woman not peel potato,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,1,0,1
1,two boys on a couch are reading a book,two boys on a couch are playing video games,two boy couch read book two boy couch play vid...,"{'neg': 0.0, 'neu': 0.806, 'pos': 0.194, 'comp...",0.000,0,0,0
2,the man on stage isnt singing into the microphone,a man in a suit is standing at a microphone an...,man stage not singe microphone man suit stand ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,1,0,1
3,tom is still in a deep coma,tom is still in a light coma,tom still deep coma tom still light coma,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,0,0,0
4,there is no dog turning on the grass and pursu...,a dog is turning on the grass and pursuing a f...,no dog turn grass pursue fly ball dog turn gra...,"{'neg': 0.155, 'neu': 0.845, 'pos': 0.0, 'comp...",0.155,1,1,1
...,...,...,...,...,...,...,...,...
739,an asian woman in in the midst of many people ...,a girl with a black bag is on a crowded train,asian woman midst many people carry black bag ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,0,0,0
740,a man is rapidly chopping some mushrooms with ...,there is no man rapidly chopping some mushroom...,man rapidly chop mushroom knife no man rapidly...,"{'neg': 0.18, 'neu': 0.82, 'pos': 0.0, 'compou...",0.180,1,1,1
741,two girls are laughing and other girls are wat...,there is no girl laughing and there is no othe...,two girl laugh girl watch no girl laugh no gir...,"{'neg': 0.237, 'neu': 0.376, 'pos': 0.387, 'co...",0.237,1,1,1
742,some ingredients are being mixed in a bowl by ...,a woman is bowling two eggs to a break dancer,ingredient mix bowl person woman bowl two egg ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,0,0,0


In [87]:
test_dataset['compound']= test_dataset['scores'].apply(lambda x :x['compound'])

In [88]:
test_dataset

Unnamed: 0,SENTENCE A,SENTENCE B,whole_sentence,scores,negativity,negativity2,negativity1,negetivity3,compound
0,a woman is peeling a potato,a woman is not peeling a potato,woman peel potato woman not peel potato,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,1,0,1,0.0000
1,two boys on a couch are reading a book,two boys on a couch are playing video games,two boy couch read book two boy couch play vid...,"{'neg': 0.0, 'neu': 0.806, 'pos': 0.194, 'comp...",0.000,0,0,0,0.3400
2,the man on stage isnt singing into the microphone,a man in a suit is standing at a microphone an...,man stage not singe microphone man suit stand ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,1,0,1,0.0000
3,tom is still in a deep coma,tom is still in a light coma,tom still deep coma tom still light coma,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,0,0,0,0.0000
4,there is no dog turning on the grass and pursu...,a dog is turning on the grass and pursuing a f...,no dog turn grass pursue fly ball dog turn gra...,"{'neg': 0.155, 'neu': 0.845, 'pos': 0.0, 'comp...",0.155,1,1,1,-0.2960
...,...,...,...,...,...,...,...,...,...
739,an asian woman in in the midst of many people ...,a girl with a black bag is on a crowded train,asian woman midst many people carry black bag ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,0,0,0,0.0000
740,a man is rapidly chopping some mushrooms with ...,there is no man rapidly chopping some mushroom...,man rapidly chop mushroom knife no man rapidly...,"{'neg': 0.18, 'neu': 0.82, 'pos': 0.0, 'compou...",0.180,1,1,1,-0.2960
741,two girls are laughing and other girls are wat...,there is no girl laughing and there is no othe...,two girl laugh girl watch no girl laugh no gir...,"{'neg': 0.237, 'neu': 0.376, 'pos': 0.387, 'co...",0.237,1,1,1,0.5859
742,some ingredients are being mixed in a bowl by ...,a woman is bowling two eggs to a break dancer,ingredient mix bowl person woman bowl two egg ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,0,0,0,0.0000


In [91]:
test_dataset

Unnamed: 0,SENTENCE A,SENTENCE B,whole_sentence,scores,negativity,negativity2,negativity1,negetivity3,compound
0,a woman is peeling a potato,a woman is not peeling a potato,woman peel potato woman not peel potato,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,1,0,1,0.0000
1,two boys on a couch are reading a book,two boys on a couch are playing video games,two boy couch read book two boy couch play vid...,"{'neg': 0.0, 'neu': 0.806, 'pos': 0.194, 'comp...",0.000,0,0,0,0.3400
2,the man on stage isnt singing into the microphone,a man in a suit is standing at a microphone an...,man stage not singe microphone man suit stand ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,1,0,1,0.0000
3,tom is still in a deep coma,tom is still in a light coma,tom still deep coma tom still light coma,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,0,0,0,0.0000
4,there is no dog turning on the grass and pursu...,a dog is turning on the grass and pursuing a f...,no dog turn grass pursue fly ball dog turn gra...,"{'neg': 0.155, 'neu': 0.845, 'pos': 0.0, 'comp...",0.155,1,1,1,-0.2960
...,...,...,...,...,...,...,...,...,...
739,an asian woman in in the midst of many people ...,a girl with a black bag is on a crowded train,asian woman midst many people carry black bag ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,0,0,0,0.0000
740,a man is rapidly chopping some mushrooms with ...,there is no man rapidly chopping some mushroom...,man rapidly chop mushroom knife no man rapidly...,"{'neg': 0.18, 'neu': 0.82, 'pos': 0.0, 'compou...",0.180,1,1,1,-0.2960
741,two girls are laughing and other girls are wat...,there is no girl laughing and there is no othe...,two girl laugh girl watch no girl laugh no gir...,"{'neg': 0.237, 'neu': 0.376, 'pos': 0.387, 'co...",0.237,1,1,1,0.5859
742,some ingredients are being mixed in a bowl by ...,a woman is bowling two eggs to a break dancer,ingredient mix bowl person woman bowl two egg ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.000,0,0,0,0.0000


In [93]:
test_dataset=test_dataset.drop(['negativity1','negativity2','negativity','compound'],axis=1)

In [94]:
test_dataset

Unnamed: 0,SENTENCE A,SENTENCE B,whole_sentence,scores,negetivity3
0,a woman is peeling a potato,a woman is not peeling a potato,woman peel potato woman not peel potato,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",1
1,two boys on a couch are reading a book,two boys on a couch are playing video games,two boy couch read book two boy couch play vid...,"{'neg': 0.0, 'neu': 0.806, 'pos': 0.194, 'comp...",0
2,the man on stage isnt singing into the microphone,a man in a suit is standing at a microphone an...,man stage not singe microphone man suit stand ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",1
3,tom is still in a deep coma,tom is still in a light coma,tom still deep coma tom still light coma,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0
4,there is no dog turning on the grass and pursu...,a dog is turning on the grass and pursuing a f...,no dog turn grass pursue fly ball dog turn gra...,"{'neg': 0.155, 'neu': 0.845, 'pos': 0.0, 'comp...",1
...,...,...,...,...,...
739,an asian woman in in the midst of many people ...,a girl with a black bag is on a crowded train,asian woman midst many people carry black bag ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0
740,a man is rapidly chopping some mushrooms with ...,there is no man rapidly chopping some mushroom...,man rapidly chop mushroom knife no man rapidly...,"{'neg': 0.18, 'neu': 0.82, 'pos': 0.0, 'compou...",1
741,two girls are laughing and other girls are wat...,there is no girl laughing and there is no othe...,two girl laugh girl watch no girl laugh no gir...,"{'neg': 0.237, 'neu': 0.376, 'pos': 0.387, 'co...",1
742,some ingredients are being mixed in a bowl by ...,a woman is bowling two eggs to a break dancer,ingredient mix bowl person woman bowl two egg ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0


In [110]:
len(list(s))

744

In [111]:
import csv
with open('C:/Users/ahmad/OneDrive/Desktop/output.csv','w') as myfile:
    dw=csv.DictWriter(myfile,fieldnames=['label'])
    dw.writeheader()
    for i in list(s):
        myfile.write(str(i)+'\n')
myfile.close()