In [126]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.window import Window
from pyspark.sql.functions import when,col,concat,lit

In [127]:
spark = SparkSession.builder.appName('Host Popularity Rental Prices').getOrCreate()

In [128]:
df_input = spark.read.options(header='True' , InferSchema='True', quote= '"',escape= '"').csv('airbnb_host_searches.csv')
#df_input.show()

In [129]:
df_pop_rating = df_input.withColumn("host_pop_rating", when(df_input.number_of_reviews == 0,"New").when(df_input.number_of_reviews.between(0,5),"New").when(df_input.number_of_reviews.between(0,5),"Rising").when(df_input.number_of_reviews.between(6,15),"Trending Up").when(df_input.number_of_reviews.between(16,40),"Popular").when(df_input.number_of_reviews > 40 ,"Hot").otherwise(""))
df_pop_rating = df_pop_rating.withColumn("host_id", concat(col("price"), lit("-"), col("room_type"), lit("-"), col("host_since"), col("zipcode"), col("number_of_reviews"))).select('host_id','id','host_pop_rating','price')

In [130]:
df_stats_avg = df_pop_rating.groupBy('host_pop_rating').avg('price').select(col("host_pop_rating").alias("host_pop_rating_avg"),col("avg(price)").alias("avg_price"))
df_stats_max = df_pop_rating.groupBy('host_pop_rating').max('price').select(col("host_pop_rating").alias("host_pop_rating_max"),col("max(price)").alias("max_price"))
df_stats_min = df_pop_rating.groupBy('host_pop_rating').min('price').select(col("host_pop_rating").alias("host_pop_rating_min"),col("min(price)").alias("min_price"))

In [131]:
df_join1 = df_stats_avg.join(df_stats_max, df_stats_avg.host_pop_rating_avg == df_stats_max.host_pop_rating_max ,'inner')
df_output = df_join1.join(df_stats_min, df_join1.host_pop_rating_avg == df_stats_min.host_pop_rating_min ,'inner').select(col("host_pop_rating_avg").alias("host_pop_rating"),col("min_price"),col("avg_price"),col("max_price"))
df_output.show()

+---------------+---------+------------------+---------+
|host_pop_rating|min_price|         avg_price|max_price|
+---------------+---------+------------------+---------+
|    Trending Up|   361.09| 476.2771794871794|   685.65|
|        Popular|   270.81|           472.815|   667.83|
|            Hot|   340.12| 467.4673913043479|   633.51|
|            New|   313.55|511.34202531645565|   741.76|
+---------------+---------+------------------+---------+

