In [13]:
import altair as alt
import pandas as pd
import panel as pn
from datetime import date

pn.extension()

df_listings = pd.read_csv("listings (1).csv")
columns_needed = [
    "price", "longitude", "latitude", "name", 
    "availability_365", "host_name", "calculated_host_listings_count", 
    "neighbourhood_group", "last_review"
]

df_selected = df_listings[columns_needed].copy()


df_selected.dropna(inplace=True)


df_selected['last_review'] = pd.to_datetime(df_selected['last_review'], errors='coerce')


df_selected = df_selected[
    (df_selected["longitude"].between(-74.5, -73.5)) & 
    (df_selected["latitude"].between(40, 41))
]


if 'room_type' not in df_selected.columns:
    df_selected['room_type'] = df_listings['room_type']


min_date, max_date = df_selected['last_review'].min().date(), df_selected['last_review'].max().date()


start_date_picker = pn.widgets.DatePicker(name="Start Date", value=min_date, start=min_date, end=max_date)
end_date_picker = pn.widgets.DatePicker(name="End Date", value=max_date, start=min_date, end=max_date)
show_top_only_checkbox = pn.widgets.Checkbox(name="Show only areas with the highest average price", value=False)

neighbourhood_group_selector = pn.widgets.Select(name="Neighbourhood Group",
                                                 options=['All'] + sorted(df_selected['neighbourhood_group'].unique().tolist()),
                                                 value='All')

room_type_selector = pn.widgets.Select(name="Room Type",
                                       options=['All'] + sorted(df_selected['room_type'].unique().tolist()),
                                       value='Private room')


def update_heatmap(start_date, end_date, show_top_only):
    start_date, end_date = pd.to_datetime(start_date), pd.to_datetime(end_date)
    df_filtered = df_selected[
        (df_selected['last_review'] >= start_date) & (df_selected['last_review'] <= end_date)
    ]

    if df_filtered.empty:
        return pn.pane.Markdown("### No data available for selected time range")

    
    top_group = df_filtered.groupby('neighbourhood_group')['price'].mean().idxmax()
    if show_top_only:
        df_filtered = df_filtered[df_filtered['neighbourhood_group'] == top_group]

    if len(df_filtered) > 5000:
        df_filtered = df_filtered.sample(5000, random_state=42)

    
    heatmap = alt.Chart(df_filtered).mark_rect(opacity=0.4).encode(
        alt.X('longitude:Q', bin=alt.Bin(maxbins=60),title='longitude'),
        alt.Y('latitude:Q', bin=alt.Bin(maxbins=60),title='latitude'),
        alt.Color('count():Q', scale=alt.Scale(scheme='reds'), title='Density')
    )

    
    scatter = alt.Chart(df_filtered).transform_calculate(
        price_range="datum.price <= 200 ? '0-200' : \
                    datum.price <= 400 ? '200-400' : \
                    datum.price <= 600 ? '400-600' : \
                    datum.price <= 800 ? '600-800' : \
                    datum.price <= 1000 ? '800-1000' : '1000+'"
    ).mark_circle(opacity=0.7).encode(
        x=alt.X('longitude:Q', title='longitude'),
        y=alt.Y('latitude:Q', title='latitude'),
        size=alt.Size('price_range:N', scale=alt.Scale(
            domain=['0-200', '200-400', '400-600', '600-800', '800-1000', '1000+'],
            range=[50, 100, 150, 200, 250, 350]
        ), title='Price Range'),
        color=alt.Color('neighbourhood_group:N', scale=alt.Scale(scheme='category20b'), title='Neighbourhood Group'),
        tooltip=['name:N', 'price:Q', 'neighbourhood_group:N', 'price_range:N']
    )

    return pn.pane.Vega(heatmap + scatter)


def update_price_distribution(neighbourhood_group, room_type):
    df_filtered = df_selected.copy()

    if neighbourhood_group != 'All':
        df_filtered = df_filtered[df_filtered['neighbourhood_group'] == neighbourhood_group]

    if room_type != 'All':
        df_filtered = df_filtered[df_filtered['room_type'] == room_type]

    if df_filtered.empty:
        return pn.pane.Markdown("### No data available for selected filters")

    bins = [0, 200, 400, 600, 800, 1000, float('inf')]
    labels = ['0-200', '200-400', '400-600', '600-800', '800-1000', '1000+']
    df_filtered['price_range'] = pd.cut(df_filtered['price'], bins=bins, labels=labels, right=False)

    price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
    total_counts = price_counts.groupby('neighbourhood_group')['count'].transform('sum')
    price_counts['percentage'] = price_counts['count'] / total_counts * 100

    chart = alt.Chart(price_counts).mark_bar().encode(
        x=alt.X('price_range:N', title='Price Range',
               sort=['0-200', '200-400', '400-600', '600-800', '800-1000', '1000+']),
        y=alt.Y('percentage:Q', title='Percentage (%)'),
        color=alt.Color('price_range:N', scale=alt.Scale(scheme='tableau10'),
                       sort=['0-200', '200-400', '400-600', '600-800', '800-1000', '1000+']),
        column=alt.Column('neighbourhood_group:N', title='Neighbourhood Group'),
        tooltip=['neighbourhood_group', 'price_range', alt.Tooltip('percentage:Q', format=".2f")]
    ).properties(
        width=150,
        height=300,
        title=f'Price Distribution for {room_type} in {neighbourhood_group}'
    )

    return chart


heatmap_ui = pn.Column(
    "## Property heat and price scatter plot",
    pn.Row(start_date_picker, end_date_picker, show_top_only_checkbox),
    pn.bind(update_heatmap, start_date_picker, end_date_picker, show_top_only_checkbox)
)

price_distribution_ui = pn.Column(
    "## Room type price range distribution",
    pn.Row(neighbourhood_group_selector, room_type_selector),
    pn.bind(update_price_distribution, neighbourhood_group_selector, room_type_selector)
)

tasks_html = pn.pane.HTML("""
    <h3>Tasks</h3>
    <ol>
    <li><b>Find the neighbourhood_group with the highest average housing price in the past year (from January 1, 2024, to December 31, 2024).</b><br></li>

    <li><b>Find the proportion of “Private room” listings priced between 0-200 in the highest-priced neighbourhood_group of 2024.</b><br></li>
    </ol>
    If you want dynamic interaction please run ipynb file!!!! 
""")

final_ui = pn.Column(
    "# Airbnb comprehensive analysis visualization dashboard",
    tasks_html,
    heatmap_ui,
    pn.layout.Divider(),
    price_distribution_ui,
    
)

final_ui.servable().show()
final_ui.save("System_B for task 1&2.html", embed=True)

  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')


Launching server at http://localhost:65433
 10%|████▍                                       | 6/60 [00:00<00:02, 20.44it/s]

  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')


 27%|███████████▍                               | 16/60 [00:00<00:01, 33.00it/s]

  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_co

 43%|██████████████████▋                        | 26/60 [00:00<00:00, 39.87it/s]

  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_co

 60%|█████████████████████████▊                 | 36/60 [00:01<00:00, 31.11it/s]

  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_co

 77%|████████████████████████████████▉          | 46/60 [00:01<00:00, 36.55it/s]

  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_co

 93%|████████████████████████████████████████▏  | 56/60 [00:01<00:00, 41.11it/s]

  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_co

                                                                                

  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
  price_counts = df_filtered.groupby(['neighbourhood_group', 'price_range']).size().reset_index(name='count')
