# Sentimental Analysis using Bernoulli and Multinomial Navie Bayes on Weather Data 

In [1]:
import pandas as pd

In [2]:
df=pd.read_excel("weatherdata.xlsx",usecols=["tweet","existence"])

In [3]:
df

Unnamed: 0,tweet,existence
0,Global warming report urges governments to act...,Yes
1,Fighting poverty and global warming in Africa ...,Yes
2,Carbon offsets: How a Vatican forest failed to...,Yes
3,Carbon offsets: How a Vatican forest failed to...,Yes
4,URUGUAY: Tools Needed for Those Most Vulnerabl...,Yes
...,...,...
198,Global warming evidence all around us|A messag...,Yes
199,Will Global Warming Make Iceland's Volcanoes A...,Yes
200,Will Global Warming Make Iceland's Volcanoes A...,Yes
201,Climate Change: Could NYC Get Katrina-Like Flo...,Yes


In [4]:
# Remove special characters
df['cleaned_tweet'] = df['tweet'].str.replace(r'[^\w\s]', '', regex=True)
df.head(5)

Unnamed: 0,tweet,existence,cleaned_tweet
0,Global warming report urges governments to act...,Yes,Global warming report urges governments to act...
1,Fighting poverty and global warming in Africa ...,Yes,Fighting poverty and global warming in Africa ...
2,Carbon offsets: How a Vatican forest failed to...,Yes,Carbon offsets How a Vatican forest failed to ...
3,Carbon offsets: How a Vatican forest failed to...,Yes,Carbon offsets How a Vatican forest failed to ...
4,URUGUAY: Tools Needed for Those Most Vulnerabl...,Yes,URUGUAY Tools Needed for Those Most Vulnerable...


In [5]:
df.drop(columns=['tweet'], inplace=True) #Dropping the old tweet column as we cleaned it.

In [6]:
df.head(5)

Unnamed: 0,existence,cleaned_tweet
0,Yes,Global warming report urges governments to act...
1,Yes,Fighting poverty and global warming in Africa ...
2,Yes,Carbon offsets How a Vatican forest failed to ...
3,Yes,Carbon offsets How a Vatican forest failed to ...
4,Yes,URUGUAY Tools Needed for Those Most Vulnerable...


In [7]:
y=df.iloc[:,:-1]
y

Unnamed: 0,existence
0,Yes
1,Yes
2,Yes
3,Yes
4,Yes
...,...
198,Yes
199,Yes
200,Yes
201,Yes


In [8]:
x=df.iloc[:,-1]
x

0      Global warming report urges governments to act...
1      Fighting poverty and global warming in Africa ...
2      Carbon offsets How a Vatican forest failed to ...
3      Carbon offsets How a Vatican forest failed to ...
4      URUGUAY Tools Needed for Those Most Vulnerable...
                             ...                        
198    Global warming evidence all around usA message...
199    Will Global Warming Make Icelands Volcanoes An...
200    Will Global Warming Make Icelands Volcanoes An...
201    Climate Change Could NYC Get KatrinaLike Flood...
202    Climate Change Geologists Drill Into Antarctic...
Name: cleaned_tweet, Length: 203, dtype: object

In [9]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import BernoulliNB,MultinomialNB
vectorizer1=CountVectorizer(binary=True,stop_words="english")
vectorizer2=CountVectorizer(binary=False,stop_words="english")

In [10]:
X1=vectorizer1.fit_transform(x)
X2=vectorizer2.fit_transform(x)

In [11]:
from sklearn.model_selection import train_test_split

In [12]:
xtrain,xtest,ytrain,ytest=train_test_split(X1,y,test_size=0.35,random_state=1)

In [13]:
xtrain2,xtest2,ytrain2,ytest2=train_test_split(X2,y,test_size=0.35,random_state=1)

In [14]:
from sklearn.naive_bayes import BernoulliNB,MultinomialNB

In [15]:
bn=BernoulliNB()
mnb=MultinomialNB()

In [16]:
bn.fit(xtrain,ytrain)

  y = column_or_1d(y, warn=True)


In [17]:
mnb.fit(xtrain2,ytrain2)

  y = column_or_1d(y, warn=True)


In [18]:
pred1=bn.predict(xtest)

In [19]:
pred2=mnb.predict(xtest2)

In [20]:
from sklearn.metrics import accuracy_score

In [21]:
accuracy_score(ytest,pred1)

0.875

In [22]:
accuracy_score(ytest2,pred2)

0.8472222222222222

In [36]:
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
xtrain,xtest,ytrain,ytest=train_test_split(x,y,random_state=21,test_size=0.3)
model=make_pipeline(TfidfVectorizer(stop_words="english"),BernoulliNB())
model.fit(xtrain,ytrain)

  y = column_or_1d(y, warn=True)


In [37]:
pred4=model.predict(xtest)
print("Accuracy_Score:\n",accuracy_score(ytest,pred4))

Accuracy_Score:
 0.9508196721311475


 Conclusion:- The highest accurracy score is 0.9508 using BernoulliNB with Tfid Vectorizer as compared to other techniques.

# Saving the model

In [34]:
import joblib
joblib.dump(model,'Sentimental_Analysis_Project')
load=joblib.load('Sentimental_Analysis_Project')

In [25]:
new_text = ["The Room temparture is increasing day-by-day due to global warming crisis."]

In [39]:
new_prediction = model.predict(new_text)
print("Prediction for new text:", new_prediction)

Prediction for new text: ['Yes']
