In [71]:
import requests

In [72]:
from bs4 import BeautifulSoup

In [73]:
r = requests.get('https://www.yelp.com/biz/tesla-san-francisco?osq=Tesla+Dealership')

In [74]:
r.text



In [75]:
soup = BeautifulSoup(r.text,'html.parser')

In [76]:
divs = soup.findAll(class_="review__373c0__13kpL")

In [77]:
reviews = []
for div in divs:
    reviews.append(div.find('p').text)

In [78]:
reviews

['I had an issue with the undercarriage cover. The rep Daniel and manager Robert listened to my concern and fixed the issue. \xa0Service was prompt and I left satisfied.',
 "I ended up filing a complaint against them with the California Bureau of Automotive Repair for not refunding me for replacement parts that weren't actually required to repair my car and didn't fix my issues.Sigh. Tesla keeps letting me down with post-warranty support.Time for a new car anyway-- It's not gonna be a Tesla.",
 "We dropped our car at our scheduled appointment at 11am, was told the basic car maintenance will be complete at 4PM (tire rotation, wheel alignment). We decide to stay in the area as we reside in Oakland. Fast forward 5 hours later, we check with the car only to find service has yet to start due to some miscommunication! I'm confused to why claiming to be a future forward company - a simple call, text, communication through the app to clear up the matter was never done. If you think taking your

# Analysing the data

In [79]:
import pandas as pd
import numpy as np

In [80]:
df = pd.DataFrame(np.array(reviews),columns=['review'])

In [81]:
df.head()

Unnamed: 0,review
0,I had an issue with the undercarriage cover. T...
1,I ended up filing a complaint against them wit...
2,We dropped our car at our scheduled appointmen...
3,Today was delivery day and we were pretty exci...
4,Horrible customer service:1) Placed order via ...


In [82]:
len(df['review'])

10

In [83]:
df['word_count'] = df['review'].apply(lambda x:len(x.split()))

In [84]:
df

Unnamed: 0,review,word_count
0,I had an issue with the undercarriage cover. T...,29
1,I ended up filing a complaint against them wit...,54
2,We dropped our car at our scheduled appointmen...,125
3,Today was delivery day and we were pretty exci...,306
4,Horrible customer service:1) Placed order via ...,293
5,TERRIBLE customer services. The guy name CK in...,90
6,I recently brought a brand new Tesla to fix so...,156
7,My Tesla 2016 model x would be the perfect car...,58
8,The service center here is TERRIBLE. I had a t...,267
9,"Unfortunately, as a recent new Tesla owner, I ...",184


In [85]:
df['char_count'] = df['review'].apply(lambda x:len(x))

In [86]:
def average_words(review):
    words = review.split()
    return sum(len(word) for word in words)/len(words)

In [87]:
df['average_word_length'] = df['review'].apply(lambda x: average_words(x))

In [88]:
import nltk
nltk.download('stopwords')
nltk.download('wordnet')
from nltk.corpus import stopwords

[nltk_data] Downloading package stopwords to /home/akash/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /home/akash/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


In [89]:
stop_words = stopwords.words('english')

In [90]:
df['stopword_count'] = df['review'].apply(lambda x:len([word for word in x.split() if word.lower() in stop_words]))

In [91]:
df.head()

Unnamed: 0,review,word_count,char_count,average_word_length,stopword_count
0,I had an issue with the undercarriage cover. T...,29,164,4.655172,14
1,I ended up filing a complaint against them wit...,54,322,4.981481,28
2,We dropped our car at our scheduled appointmen...,125,663,4.312,60
3,Today was delivery day and we were pretty exci...,306,1730,4.656863,145
4,Horrible customer service:1) Placed order via ...,293,1571,4.327645,139


In [92]:
df['stopword_rate'] = df['stopword_count']/df['word_count']

In [93]:
df.head()

Unnamed: 0,review,word_count,char_count,average_word_length,stopword_count,stopword_rate
0,I had an issue with the undercarriage cover. T...,29,164,4.655172,14,0.482759
1,I ended up filing a complaint against them wit...,54,322,4.981481,28,0.518519
2,We dropped our car at our scheduled appointmen...,125,663,4.312,60,0.48
3,Today was delivery day and we were pretty exci...,306,1730,4.656863,145,0.473856
4,Horrible customer service:1) Placed order via ...,293,1571,4.327645,139,0.474403


In [94]:
df.sort_values(by='stopword_rate')

Unnamed: 0,review,word_count,char_count,average_word_length,stopword_count,stopword_rate
7,My Tesla 2016 model x would be the perfect car...,58,345,4.948276,24,0.413793
9,"Unfortunately, as a recent new Tesla owner, I ...",184,1079,4.86413,77,0.418478
5,TERRIBLE customer services. The guy name CK in...,90,527,4.866667,39,0.433333
6,I recently brought a brand new Tesla to fix so...,156,821,4.269231,71,0.455128
3,Today was delivery day and we were pretty exci...,306,1730,4.656863,145,0.473856
4,Horrible customer service:1) Placed order via ...,293,1571,4.327645,139,0.474403
2,We dropped our car at our scheduled appointmen...,125,663,4.312,60,0.48
0,I had an issue with the undercarriage cover. T...,29,164,4.655172,14,0.482759
8,The service center here is TERRIBLE. I had a t...,267,1424,4.337079,131,0.490637
1,I ended up filing a complaint against them wit...,54,322,4.981481,28,0.518519


In [95]:
df.describe()

Unnamed: 0,word_count,char_count,average_word_length,stopword_count,stopword_rate
count,10.0,10.0,10.0,10.0,10.0
mean,156.2,864.6,4.621854,72.8,0.464091
std,103.186778,559.61995,0.287684,49.615858,0.033511
min,29.0,164.0,4.269231,14.0,0.413793
25%,66.0,390.5,4.330003,30.75,0.438782
50%,140.5,742.0,4.656018,65.5,0.474129
75%,246.25,1337.75,4.866033,117.5,0.482069
max,306.0,1730.0,4.981481,145.0,0.518519


#  Data Cleaning

In [96]:
df['lowercase'] = df['review'].apply(lambda x: " ".join(word.lower() for word in x.split()))

In [97]:
df.head()

Unnamed: 0,review,word_count,char_count,average_word_length,stopword_count,stopword_rate,lowercase
0,I had an issue with the undercarriage cover. T...,29,164,4.655172,14,0.482759,i had an issue with the undercarriage cover. t...
1,I ended up filing a complaint against them wit...,54,322,4.981481,28,0.518519,i ended up filing a complaint against them wit...
2,We dropped our car at our scheduled appointmen...,125,663,4.312,60,0.48,we dropped our car at our scheduled appointmen...
3,Today was delivery day and we were pretty exci...,306,1730,4.656863,145,0.473856,today was delivery day and we were pretty exci...
4,Horrible customer service:1) Placed order via ...,293,1571,4.327645,139,0.474403,horrible customer service:1) placed order via ...


In [98]:
df['punctuation'] = df['lowercase'].str.replace('[^\w\s]','')

  df['punctuation'] = df['lowercase'].str.replace('[^\w\s]','')


In [99]:
df['stopwords'] = df['punctuation'].apply(lambda x:" ".join(word for word in x.split() if word not in stop_words))

In [100]:
pd.Series(" ".join(df['stopwords']).split()).value_counts()[:30]

service        26
car            19
tesla          12
us              9
new             8
time            6
appointment     6
get             6
even            6
issue           5
jessica         5
said            5
customer        5
back            5
day             5
didnt           4
guy             4
would           4
upgrade         4
brand           4
problem         4
customers       4
told            4
order           4
another         3
explained       3
job             3
advise          3
call            3
without         3
dtype: int64

In [101]:
other_stop_words = ['get','without', 'would', 'week','us','test','left','one','even','also','go','asked','ap']

In [102]:
df['cleanviews'] = df['stopwords'].apply(lambda x: " ".join(word for word in x.split() if word not in other_stop_words)) 

In [103]:
df['cleanviews']

0    issue undercarriage cover rep daniel manager r...
1    ended filing complaint california bureau autom...
2    dropped car scheduled appointment 11am told ba...
3    today delivery day pretty excited collect car ...
4    horrible customer service1 placed order via ap...
5    terrible customer services guy name ck front i...
6    recently brought brand new tesla fix post deli...
7    tesla 2016 model x perfect car ifservice techs...
8    service center terrible tire needed patched ta...
9    unfortunately recent new tesla owner agree maj...
Name: cleanviews, dtype: object

In [104]:
pd.Series(" ".join(df['cleanviews']).split()).value_counts()[:30]

service        26
car            19
tesla          12
new             8
time            6
appointment     6
issue           5
customer        5
jessica         5
back            5
said            5
day             5
problem         4
didnt           4
guy             4
customers       4
upgrade         4
brand           4
told            4
order           4
find            3
weeks           3
explained       3
text            3
needed          3
shop            3
redwood         3
actually        3
fix             3
today           3
dtype: int64

In [105]:
df.head()

Unnamed: 0,review,word_count,char_count,average_word_length,stopword_count,stopword_rate,lowercase,punctuation,stopwords,cleanviews
0,I had an issue with the undercarriage cover. T...,29,164,4.655172,14,0.482759,i had an issue with the undercarriage cover. t...,i had an issue with the undercarriage cover th...,issue undercarriage cover rep daniel manager r...,issue undercarriage cover rep daniel manager r...
1,I ended up filing a complaint against them wit...,54,322,4.981481,28,0.518519,i ended up filing a complaint against them wit...,i ended up filing a complaint against them wit...,ended filing complaint california bureau autom...,ended filing complaint california bureau autom...
2,We dropped our car at our scheduled appointmen...,125,663,4.312,60,0.48,we dropped our car at our scheduled appointmen...,we dropped our car at our scheduled appointmen...,dropped car scheduled appointment 11am told ba...,dropped car scheduled appointment 11am told ba...
3,Today was delivery day and we were pretty exci...,306,1730,4.656863,145,0.473856,today was delivery day and we were pretty exci...,today was delivery day and we were pretty exci...,today delivery day pretty excited collect car ...,today delivery day pretty excited collect car ...
4,Horrible customer service:1) Placed order via ...,293,1571,4.327645,139,0.474403,horrible customer service:1) placed order via ...,horrible customer service1 placed order via ap...,horrible customer service1 placed order via ap...,horrible customer service1 placed order via ap...


#  Lemmatization

In [106]:
#import textblob
from textblob import Word

In [110]:
df['lemmatized'] = df['cleanviews'].apply(lambda x: " ".join(Word(word).lemmatize() for word in x.split()))

In [111]:
df.head()

Unnamed: 0,review,word_count,char_count,average_word_length,stopword_count,stopword_rate,lowercase,punctuation,stopwords,cleanviews,lemmatized
0,I had an issue with the undercarriage cover. T...,29,164,4.655172,14,0.482759,i had an issue with the undercarriage cover. t...,i had an issue with the undercarriage cover th...,issue undercarriage cover rep daniel manager r...,issue undercarriage cover rep daniel manager r...,issue undercarriage cover rep daniel manager r...
1,I ended up filing a complaint against them wit...,54,322,4.981481,28,0.518519,i ended up filing a complaint against them wit...,i ended up filing a complaint against them wit...,ended filing complaint california bureau autom...,ended filing complaint california bureau autom...,ended filing complaint california bureau autom...
2,We dropped our car at our scheduled appointmen...,125,663,4.312,60,0.48,we dropped our car at our scheduled appointmen...,we dropped our car at our scheduled appointmen...,dropped car scheduled appointment 11am told ba...,dropped car scheduled appointment 11am told ba...,dropped car scheduled appointment 11am told ba...
3,Today was delivery day and we were pretty exci...,306,1730,4.656863,145,0.473856,today was delivery day and we were pretty exci...,today was delivery day and we were pretty exci...,today delivery day pretty excited collect car ...,today delivery day pretty excited collect car ...,today delivery day pretty excited collect car ...
4,Horrible customer service:1) Placed order via ...,293,1571,4.327645,139,0.474403,horrible customer service:1) placed order via ...,horrible customer service1 placed order via ap...,horrible customer service1 placed order via ap...,horrible customer service1 placed order via ap...,horrible customer service1 placed order via ap...


#  Sentiment Analysis

In [109]:
from textblob import TextBlob

In [115]:
df['polarity'] = df['lemmatized'].apply(lambda x: TextBlob(x).sentiment[0])
df['subjectivity']= df['lemmatized'].apply(lambda x: TextBlob(x).sentiment[1])

In [122]:
df.drop(['stopword','cleanviews','lemmatized'],axis=1,inplace=True)
df.head()

Unnamed: 0,review,word_count,char_count,average_word_length,stopword_count,stopword_rate,polarity,subjectivity
0,I had an issue with the undercarriage cover. T...,29,164,4.655172,14,0.482759,0.3,0.6
1,I ended up filing a complaint against them wit...,54,322,4.981481,28,0.518519,-0.054545,0.251515
2,We dropped our car at our scheduled appointmen...,125,663,4.312,60,0.48,-0.020455,0.296861
3,Today was delivery day and we were pretty exci...,306,1730,4.656863,145,0.473856,0.291106,0.667274
4,Horrible customer service:1) Placed order via ...,293,1571,4.327645,139,0.474403,-0.065909,0.6


In [123]:
df.sort_values(by='polarity')

Unnamed: 0,review,word_count,char_count,average_word_length,stopword_count,stopword_rate,polarity,subjectivity
4,Horrible customer service:1) Placed order via ...,293,1571,4.327645,139,0.474403,-0.065909,0.6
1,I ended up filing a complaint against them wit...,54,322,4.981481,28,0.518519,-0.054545,0.251515
5,TERRIBLE customer services. The guy name CK in...,90,527,4.866667,39,0.433333,-0.053247,0.475758
2,We dropped our car at our scheduled appointmen...,125,663,4.312,60,0.48,-0.020455,0.296861
8,The service center here is TERRIBLE. I had a t...,267,1424,4.337079,131,0.490637,0.007917,0.427381
6,I recently brought a brand new Tesla to fix so...,156,821,4.269231,71,0.455128,0.03683,0.429604
9,"Unfortunately, as a recent new Tesla owner, I ...",184,1079,4.86413,77,0.418478,0.068831,0.313412
7,My Tesla 2016 model x would be the perfect car...,58,345,4.948276,24,0.413793,0.25,0.383333
3,Today was delivery day and we were pretty exci...,306,1730,4.656863,145,0.473856,0.291106,0.667274
0,I had an issue with the undercarriage cover. T...,29,164,4.655172,14,0.482759,0.3,0.6
