In [1]:
import pandas as pd
import pickle
from textblob import TextBlob

## Importing Data

In [2]:
#reviews data
reviews = pd.read_csv('reviews.csv')
reviews

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments
0,4826,3419,2009-06-01,13823,Anja,We just came back from a weeks stay at Kaan's ...
1,20815,31174,2010-03-24,49388,Chuck,My daughter and her friend and I all stayed at...
2,20815,40738,2010-05-08,93598,Sara,You cannot find a better place to stay in Turk...
3,20815,49182,2010-06-02,108540,Suzan,This comfortable apartment truly deserves the ...
4,20815,85462,2010-08-27,126482,William,I second all the excellent reviews of this pla...
...,...,...,...,...,...,...
178772,40401523,570449110,2019-11-29,147759516,Leilani,The host canceled this reservation 2 days befo...
178773,40420409,570715433,2019-11-30,262947736,Salih,Memnun kalacaksınız
178774,40421668,569933244,2019-11-28,186037601,Ferdi,The host canceled this reservation 33 days bef...
178775,40440084,570251114,2019-11-29,287155158,M Rawand,The host canceled this reservation 32 days bef...


In [3]:
# listings_v2
file = open('listings_v2', 'rb')
listings_v2 = pickle.load(file)
file.close()

listings_v2

Unnamed: 0,id,host_is_superhost,neighbourhood,latitude,longitude,property_type,room_type,accommodates,bathrooms,bedrooms,...,family/kid friendly,gym,elevator,wifi,breakfast,heating,air conditioning,tv,internet,extra pillows and blankets
0,4826,0.0,Uskudar,41.05650,29.05367,Apartment,Entire home/apt,2,1.0,0.0,...,1,0,0,1,1,1,0,1,1,0
1,20815,0.0,Besiktas,41.06984,29.04545,Apartment,Entire home/apt,3,1.0,2.0,...,1,0,0,0,0,1,1,1,0,0
2,27271,0.0,Beyoglu,41.03220,28.98216,Apartment,Entire home/apt,2,1.0,1.0,...,1,0,0,1,0,1,0,1,1,1
3,28277,0.0,Sisli,41.04471,28.98567,Serviced apartment,Hotel room,5,1.0,1.0,...,1,0,1,1,0,1,1,1,1,0
4,28318,0.0,Sariyer,41.09048,29.05559,Apartment,Entire home/apt,2,1.0,2.0,...,1,0,0,1,0,1,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20608,39001204,0.0,Beyoglu,41.03495,28.98621,Apartment,Entire home/apt,3,1.0,2.0,...,0,0,0,1,0,1,0,1,0,0
20609,39001388,0.0,Beyoglu,41.03027,28.97853,House,Private room,2,1.0,,...,0,0,0,1,0,1,0,0,0,0
20610,39001417,0.0,Fatih,41.00408,28.97318,Boutique hotel,Private room,5,1.0,,...,0,0,0,1,1,1,1,1,0,0
20611,39001582,0.0,Sariyer,41.10560,29.04835,Apartment,Private room,2,1.0,1.0,...,0,0,0,1,0,1,0,0,0,0


## dropping the null comments.

In [4]:
reviews_null_indexes = reviews[reviews['comments'].isnull()].index.tolist()
reviews = reviews.drop(index = reviews_null_indexes)

### Adding polarity feature to reviews dataframe

In [5]:
def comment_polarity(df):
    polarity = []
    for comment in reviews['comments']:
        polarity.append(float(TextBlob(comment).sentiment.polarity))
    df["polarity"] = polarity
    return df

reviews = comment_polarity(reviews)

In [6]:
# Keeping only listing_id and polarity scores + taking the avg polarity of each listing_id
polarity_df = reviews.drop(columns = ['id', 'date','reviewer_id', 'reviewer_name','comments']).groupby('listing_id',as_index=False).mean()

#rename the key column(listing_id) to match with the one in listings_v2 data frame(id)
polarity_df_a = polarity_df.rename(columns = {"listing_id":"id"},inplace = True)


# adding polarity feature to listings_v2
listings_v3 = pd.merge(listings_v2, polarity_df, how = 'left', on = 'id')

# removing id column from the dataset
listings_v3.drop('id', axis=1, inplace = True)

In [8]:
listings_v3.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 20613 entries, 0 to 20612
Data columns (total 50 columns):
host_is_superhost                               20588 non-null float64
neighbourhood                                   20613 non-null object
latitude                                        20613 non-null float64
longitude                                       20613 non-null float64
property_type                                   20613 non-null object
room_type                                       20613 non-null object
accommodates                                    20613 non-null int64
bathrooms                                       20533 non-null float64
bedrooms                                        20553 non-null float64
beds                                            20384 non-null float64
bed_type                                        20613 non-null object
price                                           20613 non-null int32
security_deposit                               

## Saving listings_v3

In [10]:
file = open("listings_v3", 'wb')
pickle.dump(listings_v3, file)
file.close()