In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
yelp = pd.read_csv('yelp.csv')

In [None]:
yelp.head()

In [None]:
yelp.info()

In [None]:
yelp.describe()

In [None]:
yelp['text length'] = yelp['text'].apply(len)

In [None]:
g = sns.FacetGrid(yelp,col='stars')
g = g.map(plt.hist,'text length',bins=20)

In [None]:
sns.boxplot(x="stars", y="text length", data=yelp,palette='rainbow')

In [None]:
sns.countplot(x='stars',data=yelp)

In [None]:
stars = yelp.groupby('stars').mean()
stars

In [None]:
sns.heatmap(stars.corr(),cmap='coolwarm',annot=True)

In [None]:
yelp_class = yelp[(yelp.stars==1) | (yelp.stars==5)]
X = yelp_class['text']
y = yelp_class['stars']

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer()
X = cv.fit_transform(X)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.3,random_state=101)

In [None]:
from sklearn.naive_bayes import MultinomialNB
nb = MultinomialNB()
nb.fit(X_train,y_train)

In [None]:
predictions = nb.predict(X_test)

In [None]:
from sklearn.metrics import confusion_matrix,classification_report
print(confusion_matrix(y_test,predictions))
print('\n')
print(classification_report(y_test,predictions))

In [None]:
from sklearn.feature_extraction.text import  TfidfTransformer
from sklearn.pipeline import Pipeline
pipeline = Pipeline([
    ('bow', CountVectorizer()),  # strings to token integer counts
    ('tfidf', TfidfTransformer()),  # integer counts to weighted TF-IDF scores
    ('classifier', MultinomialNB()),  # train on TF-IDF vectors w/ Naive Bayes classifier
])

In [None]:
X = yelp_class['text']
y = yelp_class['stars']
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.3,random_state=101)

pipeline.fit(X_train,y_train)

In [None]:
predictions = pipeline.predict(X_test)
print(confusion_matrix(y_test,predictions))
print(classification_report(y_test,predictions))