In [1]:
import polars as pl
import plotly.express as px
import os
import glob

In [2]:
def airbnb_per_host(file,ptype,neighbourhood):
    df=pl.read_csv(file)
    if neighbourhood:
        df=df.filter((pl.col('property_type')==ptype)&(pl.col('neighbourhood_group_cleansed')==neighbourhood)&
                    (pl.col('has_availability')=="t"))
    else:
        df=df.filter((pl.col('property_type')==ptype)&(pl.col('has_availability')=="t"))

    host_count=df['host_id'].value_counts().sort('count')
    breaks=[1,2,5,10,100] 
    labels=['1','2','+2','+5','+10','+100']
    host_count = host_count.with_columns(
    pl.col("count").cut(breaks=breaks, labels=labels, left_closed=False).alias("binned_counts"))

    df=df.join(host_count,on='host_id',how='left')

    graph_data=df['binned_counts'].value_counts().sort('binned_counts')
    total_sum=graph_data['count'].sum()
    graph_data=graph_data.with_columns(((pl.col('count')/total_sum)*100).alias('percentage'))

    return graph_data

In [None]:
df_combined = pl.DataFrame({
        "binned_counts": pl.Series(dtype=pl.Categorical),
        "count": pl.Series(dtype=pl.UInt32),
        "percentage": pl.Series(dtype=pl.Float64),
        "city":pl.Series(dtype=pl.String)
    })

city_files =glob.glob("*.csv.gz")

for file in city_files:
    file_name=os.path.basename(file)
    city=file_name.split('_')[0]
    print('Scanning started for --->',city)
    
    data=airbnb_per_host(file,'Entire rental unit',None)
        
    data=data.with_columns(pl.lit(city.capitalize()).alias("city"))

    df_combined = pl.concat([df_combined, data], how="vertical")

print('Finished scanning of ' +str(len(city_files)) +' cities')

Scanning started for ---> amsterdam




Scanning started for ---> athens
Scanning started for ---> barcelona
Scanning started for ---> copenhagen
Scanning started for ---> crete
Scanning started for ---> edinburgh
Scanning started for ---> florence
Scanning started for ---> lisbon
Scanning started for ---> london
Scanning started for ---> malaga


In [None]:
df_combined = df_combined.with_columns(
    pl.when((pl.col("binned_counts") == "1") | (pl.col("binned_counts")=="2"))
    .then(pl.col("percentage") * -1)
    .otherwise(pl.col("percentage")).alias("percentage")
)
df_combined

In [None]:
left_count=[]
right_count=[]
left_x=[]
right_x=[]
list_city=df_combined['city'].unique()
for city in list_city:
    print(city)
    left_count.append(abs(round(df_combined.filter((pl.col('city')==city)&
                                         ((pl.col("binned_counts") == "1") | (pl.col("binned_counts")=="2")))['percentage'].sum())))
    left_x.append(df_combined.filter((pl.col('city')==city)&
                                         ((pl.col("binned_counts") == "1") | (pl.col("binned_counts")=="2")))['percentage'].sum()-2.5)
    right_count.append(abs(round(df_combined.filter((pl.col('city')==city)&
                                         ((pl.col("binned_counts") != "1") & (pl.col("binned_counts")!="2")))['percentage'].sum())))
    right_x.append(df_combined.filter((pl.col('city')==city)&
                                         ((pl.col("binned_counts") != "1") & (pl.col("binned_counts")!="2")))['percentage'].sum()+2.5)
    

graph_counts=pl.DataFrame(data={'left_text':left_count,'left_x':left_x,'right_text':right_count,'right_x':right_x,'city':list_city})
graph_counts

In [None]:
annotations=[]
for row in graph_counts.iter_rows():
    left_text,right_text,left_x,right_x,city=row
    annotations.append(dict(x=right_text, y=city, text=left_text,showarrow=False,font=dict(color="grey",weight='bold')))
    annotations.append(dict(x=right_x, y=city, text=left_x,showarrow=False,font=dict(color="grey",weight='bold')))

legend_annotation_text=["1","2","+2","+5","+10","+100"]
legend_annotation_pos=[-16,-3,5,15,40,65]
annotations_legend=[
    dict(x=x, y=14.5, text=text, showarrow=True, ax=0, ay=-20,arrowhead=2,font=dict(color=palettecolor,weight='bold'))
    for x, text,palettecolor in zip(legend_annotation_pos, legend_annotation_text,palette)
]

annotations.extend(annotations_legend)

text_annotation=['Number of properties','Cities with strict Airbnb Regulations']
text_annotation_xpos=[-40,-75]
text_annotation_ypos=[15,5.75]
annotations_text=[
    dict(x=x,y=y,text=text,showarrow=False,font=dict(color="grey",weight='bold'))
    for x,y,text in zip(text_annotation_xpos,text_annotation_ypos,text_annotation)
    
]
annotations.extend(annotations_text)

subtitle_annotation=['Individual','Professional']
subtitle_annotation_xpos=[-50,50]
subtitle_annotation_ypos=[16,16]
subtitle_colors=['#7ba591','#faa41b']
annotations_subtitle=[
    dict(x=x,y=y,text=text,showarrow=False,font=dict(color=color,size=16))
    for x,y,text,color in zip(subtitle_annotation_xpos,subtitle_annotation_ypos,subtitle_annotation,subtitle_colors)
    
]
annotations.extend(annotations_subtitle)

In [None]:
city_order=graph_counts.sort('left_text')['city']
palette=["#537c78","#7ba591","#cc222b","#f15b4c","#faa41b","#ffd45b"]

In [None]:
fig = px.bar(df_combined, x="percentage", y="city", orientation='h',color='binned_counts',
             color_discrete_sequence=palette,
            category_orders={"binned_counts": ["2", "1", "+2","+5","+10","+100"],'city':city_order})

fig.update_layout(
    height=700,
    width=1100,
    template='plotly_white',
    annotations=annotations,
    bargap=0.3,
    margin_b=10,
    margin_t=70,
    xaxis_title="",
    yaxis_title="",
    title=dict(text="Who is your Airbnb host?<br><sup>% of airbnb listings in European cities owned by single/multiproperty hosts</sup>",font=dict(size=30)),
    font=dict(
        family="Franklin Gothic"),
    legend=dict(
        orientation='h',
        x=0.5,
        y=-0.085,
        xanchor='center',
        yanchor='bottom',
        title="Number of properties per host"
    )
)
fig.update_xaxes(
    tickvals=[-75,-50,-25,0,25,50,75,100],
    ticktext=["75%","50%","25%","0","25%","50%","75%","100%"],
    side='bottom',
    griddash='dash')
    
fig.update_yaxes(anchor='free',shift=10,
                tickfont=dict(size=18,weight='normal'))

fig.add_shape(
    type="line",
    x0=-95,  
    x1=90, 
    y0=5.5,
    y1=5.5, 
    line=dict(color="lightgrey",width=2,dash='dot')  
)

fig.add_shape(
    type="rect",
    x0=-95,
    x1=90,
    y0=-0.5,
    y1=5.5,
    fillcolor="rgba(211, 211, 211, 0.2)",
    line=dict(width=0)
)

fig.add_shape(
    type="line",
    x0=0,  
    x1=0, 
    y0=-0.5,
    y1=15.25, 
    line=dict(color="grey",width=2,dash='solid')  
)

fig.show()