In [1]:
import geopandas as gpd
import pandas as pd 

In [2]:
# read csv file 

reviews = pd.read_csv('../../dataset/airbnb/reviews_complete.csv')

In [3]:
reviews.head()

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments
0,13131,32583962,2015-05-19,10395756,Juan,A unique place to say the least. Such a pleas...
1,13131,79179418,2016-06-11,33583317,Annie,We had a fabulous time in the green windmill f...
2,13131,97734954,2016-08-28,5834919,Katherine,FANTASTIC!!! Quintessential Airbnb experience....
3,13131,108353272,2016-10-15,49771730,Chin Wai,The owner greet us once we had arrived and ext...
4,13131,204651070,2017-10-19,112577092,Erin,Nikos’s place is fantastic! The quality of the...


In [4]:
# check if there are duplicates reviewer_id
reviews.duplicated(subset=['reviewer_id']).sum()

43677

In [5]:
# open listings as geodataframe
listings = gpd.read_file('../../dataset/airbnb/reviews_complete.csv',crs='EPSG:4326')
listings = gpd.GeoDataFrame(listings,crs='EPSG:4326',geometry=gpd.points_from_xy(listings.longitude, listings.latitude))
listings = listings.cx[25.3:25.5, 36.3:36.5]

In [6]:
listings.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,geometry
0,13131,Green Windmill,50838,Nikos,,Θήρας (Santorini),36.45351,25.43316,Entire home/apt,280,2,7,2019-07-05,0.13,3,202,POINT (25.43316 36.45351)
1,13443,Lilac Windmill Villa,50838,Nikos,,Θήρας (Santorini),36.45304,25.43263,Entire home/apt,252,2,10,2018-04-09,0.17,3,209,POINT (25.43263 36.45304)
10,48289,cavehouse with caldera sunsetview,219945,Ioanna,,Θήρας (Santorini),36.46203,25.37248,Entire home/apt,170,2,176,2019-09-23,1.94,3,147,POINT (25.37248 36.46203)
15,78178,Apartments in Firostefani- sunrise,51279,Travel & Tourism Greece,,Θήρας (Santorini),36.42499,25.42974,Private room,110,2,56,2019-06-12,0.56,54,13,POINT (25.42974 36.42499)
16,78182,Apartments in Firostefani- Garden,51279,Travel & Tourism Greece,,Θήρας (Santorini),36.42536,25.42928,Private room,135,2,53,2019-08-30,0.53,54,10,POINT (25.42928 36.42536)


In [7]:
# convert column in int type

listings['id'] = listings['id'].astype(int)
reviews['listing_id'] = reviews['listing_id'].astype(int)

listings = listings.merge(reviews, left_on='id', right_on='listing_id')

In [8]:
# select only reviewers with more than 1 review

returning_visitors = listings.groupby('reviewer_id').filter(lambda x: len(x) > 1)

In [9]:
returning_visitors.head()   

Unnamed: 0,id_x,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,geometry,listing_id,id_y,date,reviewer_id,reviewer_name,comments
1,13131,Green Windmill,50838,Nikos,,Θήρας (Santorini),36.45351,25.43316,Entire home/apt,280,...,0.13,3,202,POINT (25.43316 36.45351),13131,79179418,2016-06-11,33583317,Annie,We had a fabulous time in the green windmill f...
11,13443,Lilac Windmill Villa,50838,Nikos,,Θήρας (Santorini),36.45304,25.43263,Entire home/apt,252,...,0.17,3,209,POINT (25.43263 36.45304),13443,53345473,2015-11-08,13071990,Igor,Definitely a wonderful stay at the lilac windm...
31,48289,cavehouse with caldera sunsetview,219945,Ioanna,,Θήρας (Santorini),36.46203,25.37248,Entire home/apt,170,...,1.94,3,147,POINT (25.37248 36.46203),48289,7818122,2013-10-04,8884745,Ming,We spent two wonderful days in Ioanna 's apart...
38,48289,cavehouse with caldera sunsetview,219945,Ioanna,,Θήρας (Santorini),36.46203,25.37248,Entire home/apt,170,...,1.94,3,147,POINT (25.37248 36.46203),48289,16194360,2014-07-23,645767,Cédric,The reservation was canceled 18 days before ar...
72,48289,cavehouse with caldera sunsetview,219945,Ioanna,,Θήρας (Santorini),36.46203,25.37248,Entire home/apt,170,...,1.94,3,147,POINT (25.37248 36.46203),48289,74153524,2016-05-13,28483361,Chen,Mr. Fotinos' cave house is clean and decorated...


In [10]:
# order by reviewer_id and date

returning_visitors = returning_visitors.sort_values(by=['reviewer_id','date'])

In [11]:
returning_visitors.head()

Unnamed: 0,id_x,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,geometry,listing_id,id_y,date,reviewer_id,reviewer_name,comments
33775,6002904,LA MER CAVE HOUSE-VOLCANO VIEW,3867239,Maria,,Θήρας (Santorini),36.4614,25.37409,Entire home/apt,180,...,3.85,28,346,POINT (25.37409 36.46140),6002904,103587213,2016-09-23,3840,Christine,the host is very responsive via airbnb app and...
62211,13635039,ARGITHEA APARTMENT,3867239,Maria,,Θήρας (Santorini),36.46072,25.37461,Entire home/apt,220,...,3.65,28,324,POINT (25.37461 36.46072),13635039,103722162,2016-09-24,3840,Christine,the host was very responsive via airbnb app be...
1658,163832,"""Gravitate"" caldera luxury view!",665681,Sensyo Home,,Θήρας (Santorini),36.42604,25.42784,Entire home/apt,130,...,2.31,4,182,POINT (25.42784 36.42604),163832,99771161,2016-09-05,15176,Charlie,"""Gravitate"" was the first place we landed at t..."
28219,4562213,Nicoletta Ammoudi Suite B,5044938,"Nicoletta, Christina, Alexandra",,Θήρας (Santorini),36.46148,25.37092,Entire home/apt,130,...,2.9,18,148,POINT (25.37092 36.46148),4562213,100357406,2016-09-08,15176,Charlie,"As you can see in the photos, the location is ..."
78958,20144115,Santa Rinoula Suites 1,136239977,Dimitrios,,Θήρας (Santorini),36.37492,25.42409,Private room,535,...,2.24,3,4,POINT (25.42409 36.37492),20144115,318702058,2018-09-04,44934,Darren,Dimitrios and Eirini’s place was beyond our ex...


In [12]:
# convert date in datetime

returning_visitors['date'] = pd.to_datetime(returning_visitors['date'])

# compute the time between the first and the second review
returning_visitors['time_between_reviews'] = returning_visitors.groupby('reviewer_id')['date'].diff()

In [13]:
returning_visitors.head()

Unnamed: 0,id_x,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,...,calculated_host_listings_count,availability_365,geometry,listing_id,id_y,date,reviewer_id,reviewer_name,comments,time_between_reviews
33775,6002904,LA MER CAVE HOUSE-VOLCANO VIEW,3867239,Maria,,Θήρας (Santorini),36.4614,25.37409,Entire home/apt,180,...,28,346,POINT (25.37409 36.46140),6002904,103587213,2016-09-23,3840,Christine,the host is very responsive via airbnb app and...,NaT
62211,13635039,ARGITHEA APARTMENT,3867239,Maria,,Θήρας (Santorini),36.46072,25.37461,Entire home/apt,220,...,28,324,POINT (25.37461 36.46072),13635039,103722162,2016-09-24,3840,Christine,the host was very responsive via airbnb app be...,1 days
1658,163832,"""Gravitate"" caldera luxury view!",665681,Sensyo Home,,Θήρας (Santorini),36.42604,25.42784,Entire home/apt,130,...,4,182,POINT (25.42784 36.42604),163832,99771161,2016-09-05,15176,Charlie,"""Gravitate"" was the first place we landed at t...",NaT
28219,4562213,Nicoletta Ammoudi Suite B,5044938,"Nicoletta, Christina, Alexandra",,Θήρας (Santorini),36.46148,25.37092,Entire home/apt,130,...,18,148,POINT (25.37092 36.46148),4562213,100357406,2016-09-08,15176,Charlie,"As you can see in the photos, the location is ...",3 days
78958,20144115,Santa Rinoula Suites 1,136239977,Dimitrios,,Θήρας (Santorini),36.37492,25.42409,Private room,535,...,3,4,POINT (25.42409 36.37492),20144115,318702058,2018-09-04,44934,Darren,Dimitrios and Eirini’s place was beyond our ex...,NaT


In [14]:
# select only reviewers with a time between the first and the second review > 30 days maintaining the first review

returning_visitors_one_month = returning_visitors[returning_visitors['time_between_reviews'] > '365 days']

In [15]:
returning_visitors_one_month.head()

Unnamed: 0,id_x,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,...,calculated_host_listings_count,availability_365,geometry,listing_id,id_y,date,reviewer_id,reviewer_name,comments,time_between_reviews
59176,13235351,Offer Apartment Santorini,434714,Kelenis Apartments,,Θήρας (Santorini),36.41201,25.44525,Hotel room,55,...,5,0,POINT (25.44525 36.41201),13235351,330081994,2018-09-30,446104,IAN And GLYNIS,We stayed two nights at Kelenis - this place i...,1465 days
39691,6860527,VILLA MATILDE-VOLCANO VIEW,3867239,Maria,,Θήρας (Santorini),36.46134,25.37315,Entire home/apt,380,...,28,320,POINT (25.37315 36.46134),6860527,202812735,2017-10-13,558095,Eric,This was out second stay at one of Maria's San...,1494 days
57684,13035809,The 'Top Caldera view' villa in Oia,3197377,Fanis&Tina,,Θήρας (Santorini),36.4616,25.37408,Entire home/apt,700,...,13,216,POINT (25.37408 36.46160),13035809,268210374,2018-05-24,738047,Tanya,Fanis & Tina's place is breath taking. Amidst ...,1029 days
13017,972466,"Villa Fegari, Thea apartment",4173986,Yiannis,,Θήρας (Santorini),36.42533,25.42872,Hotel room,280,...,13,240,POINT (25.42872 36.42533),972466,67881522,2016-04-01,803005,Brandon,Yiannis was the best host! Easy to communicate...,1642 days
82390,22706829,INO HOTEL A08,162972715,Dorina,,Θήρας (Santorini),36.37764,25.48551,Hotel room,60,...,20,235,POINT (25.48551 36.37764),22706829,531270917,2019-09-17,944962,Foris,The best place to stay in Santorini! sparkling...,1495 days


In [16]:
# select from returning_visitors only reviewer_id in returning_visitors_one_month

returning_visitors = returning_visitors[returning_visitors['reviewer_id'].isin(returning_visitors_one_month['reviewer_id'])]

In [17]:
returning_visitors.head()

Unnamed: 0,id_x,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,...,calculated_host_listings_count,availability_365,geometry,listing_id,id_y,date,reviewer_id,reviewer_name,comments,time_between_reviews
8739,775712,Double Studio in Karterados village,4091774,Helen,,Θήρας (Santorini),36.41179,25.44721,Entire home/apt,80,...,10,164,POINT (25.44721 36.41179),775712,20224739,2014-09-26,446104,IAN And GLYNIS,Helen was very welcoming and helpful with her ...,NaT
59176,13235351,Offer Apartment Santorini,434714,Kelenis Apartments,,Θήρας (Santorini),36.41201,25.44525,Hotel room,55,...,5,0,POINT (25.44525 36.41201),13235351,330081994,2018-09-30,446104,IAN And GLYNIS,We stayed two nights at Kelenis - this place i...,1465 days
6456,739427,VILLA KALLISTI-VOLCANO VIEW,3867239,Maria,,Θήρας (Santorini),36.46086,25.37466,Entire home/apt,250,...,28,330,POINT (25.37466 36.46086),739427,7204639,2013-09-10,558095,Eric,Wow!\r\n\r\nVilla Kallisti is exactly what you...,NaT
39691,6860527,VILLA MATILDE-VOLCANO VIEW,3867239,Maria,,Θήρας (Santorini),36.46134,25.37315,Entire home/apt,380,...,28,320,POINT (25.37315 36.46134),6860527,202812735,2017-10-13,558095,Eric,This was out second stay at one of Maria's San...,1494 days
23716,3080829,"FAMILY FRIENDLY, BOUTIQUE HOTEL",4906441,Maria,,Θήρας (Santorini),36.35967,25.40123,Entire home/apt,38,...,24,151,POINT (25.40123 36.35967),3080829,40346459,2015-07-30,738047,Tanya,The Rodakas Family Hotel is perfect! It looks ...,NaT


In [25]:
# list unique reviewer_id

reviewer_id = returning_visitors['reviewer_id'].unique()
print(len(reviewer_id))

217


In [19]:
import folium

In [24]:
colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', 'lightred', 'beige', 'darkblue', 'darkgreen']
i = 0
m = folium.Map(location=[returning_visitors.geometry.y.mean(), returning_visitors.geometry.x.mean()], zoom_start=12)

for idx, c in zip(reviewer_id[0:10], colors):
    # Get the color based on the value column
    color = colors[i]
    
    # Create a custom icon for the marker based on color
    icon = folium.Icon(color=color, icon='user')
    
    # Add marker with custom icon to the map

    m = returning_visitors[returning_visitors['reviewer_id']==idx][['id_x','geometry','reviewer_id']].explore(marker_type='marker',marker_kwds={'icon': icon}, m=m)
    
    i += 1
    
m


In [36]:
# compute a distance column between listings reviewed by the same reviewer without geopy

returning_visitors['distance'] = returning_visitors.groupby('reviewer_id')['geometry'].apply(lambda x: x.distance(x.shift()))


  returning_visitors['distance'] = returning_visitors.groupby('reviewer_id')['geometry'].apply(lambda x: x.distance(x.shift()))


In [47]:
print(returning_visitors['distance'].max(), returning_visitors['distance'].min())

0.14747739657317205 0.0


In [56]:
import matplotlib.pyplot as plt

In [90]:
import random

In [134]:
different_behavior = returning_visitors[returning_visitors['distance'] > 0.1]

print(different_behavior['reviewer_id'].nunique())

# list 24 different colors

colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', 'lightred', 'beige', 'darkblue', 'darkgreen']

i = 0
m = folium.Map(location=[returning_visitors.geometry.y.mean(), returning_visitors.geometry.x.mean()], zoom_start=12)
reviewer_id_similar = different_behavior['reviewer_id'].unique()


for idx, c in zip(reviewer_id_similar[3:6], colors):
    # Get the color based on the value column
    color = colors[i]
    
    # Create a custom icon for the marker based on color
    icon = folium.Icon(color=color, icon='user')
    
    # Add marker with custom icon to the map

    m = returning_visitors[returning_visitors['reviewer_id']==idx][['id_x','geometry','reviewer_id']].explore(marker_type='marker',marker_kwds={'icon': icon}, m=m)
    #m = similar_behavior[similar_behavior['reviewer_id']==idx][['id_x','geometry','reviewer_id']].explore(marker_type='circle_marker',marker_kwds={'radius':10, 'fill':True}, m=m, style_kwds={'fillColor': color, 'fillOcacity': 1.0, 'color':color, 'weight':1})
    
    i += 1
    
m

19


In [132]:
similar_behavior = returning_visitors[returning_visitors['distance'] < 0.001]

print(similar_behavior['reviewer_id'].nunique())

# list 24 different colors

colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', 'lightred', 'beige', 'darkblue', 'darkgreen']

i = 0
m = folium.Map(location=[returning_visitors.geometry.y.mean(), returning_visitors.geometry.x.mean()], zoom_start=12)
reviewer_id_similar = similar_behavior['reviewer_id'].unique()


for idx, c in zip(reviewer_id_similar[15:20], colors):
    # Get the color based on the value column
    color = colors[i]
    
    # Create a custom icon for the marker based on color
    icon = folium.Icon(color=color, icon='user')
    
    # Add marker with custom icon to the map

    m = returning_visitors[returning_visitors['reviewer_id']==idx][['id_x','geometry','reviewer_id']].explore(marker_type='marker',marker_kwds={'icon': icon}, m=m)
    #m = similar_behavior[similar_behavior['reviewer_id']==idx][['id_x','geometry','reviewer_id']].explore(marker_type='circle_marker',marker_kwds={'radius':10, 'fill':True}, m=m, style_kwds={'fillColor': color, 'fillOcacity': 1.0, 'color':color, 'weight':1})
    
    i += 1
    
m

42


In [135]:
# select reviewers_in similar behavior that are not in different behavior

reviewer_id_similar = similar_behavior['reviewer_id'].unique()
reviewer_id_different = different_behavior['reviewer_id'].unique()

reviewer_id_similar = [x for x in reviewer_id_similar if x not in reviewer_id_different] 

# print number of reviewers with different behavior and with similar behavior

print(len(reviewer_id_different), len(reviewer_id_similar))

19 40
