# Exploratory Data Analysis (EDA)

In [None]:
listing.head()

Unnamed: 0,name,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,total_reviews,reviews_per_month,host_listings_count,availability_365,number_of_reviews_ltm,has_license
1,Only 2 stops to Manhattan studio,Allen & Irina,Brooklyn,Williamsburg,40.70935,-73.95342,Entire home/apt,96.0,30,194,1.01,1,173,3,0
2,Uptown Sanctuary w/ Private Bath (Month to Month),Kahshanna,Manhattan,East Harlem,40.80107,-73.94255,Private room,59.0,30,1,0.03,2,83,0,0
3,UES Beautiful Blue Room,Cyn,Manhattan,East Harlem,40.78778,-73.94759,Private room,67.0,30,251,1.34,1,264,5,0
5,"Perfect for Your Parents, With Garden & Patio",Jane,Brooklyn,Fort Greene,40.69194,-73.97389,Private room,195.0,2,398,2.16,2,190,36,1
6,Sunny Williamsburg Loft with Sauna,Chaya,Brooklyn,Williamsburg,40.718807,-73.956177,Entire home/apt,290.0,30,13,0.07,1,0,1,0


In [None]:

import plotly.express as px
df_grouped = listing.groupby('neighbourhood_group', as_index=False)['price'].mean()

fig = px.bar(
    df_grouped,
    x='neighbourhood_group',
    y='price',
    color='neighbourhood_group',
    title='Average Price by Neighbourhood Group',
    labels={'price': 'Average Price ($)', 'neighbourhood_group': 'Neighbourhood Group'}
)
fig.update_layout(
    xaxis_title='Neighbourhood Group',
    yaxis_title='Average Price ($)',
    template='plotly_white'
)
fig.show()


From the plot we can conclude that Manhattan is the most expensive neighboorhood and Bronx the least expensive among them

In [None]:
# Distribution by 'room_type'
df_room_type = listing.groupby('room_type', as_index=False)['price'].mean()

fig = px.bar(
    df_room_type,
    x='room_type',
    y='price',
    color='room_type',
    title='Average Price by Room Type',
    labels={'price': 'Average Price ($)', 'room_type': 'Room Type'}
)
fig.update_layout(
    xaxis_title='Room Type',
    yaxis_title='Average Price ($)',
    template='plotly_white'
)
fig.show()

Hotel prices are the most expensive and shared rooms are the least expensive.

In [None]:
# price vs license

import plotly.express as px
df_license = listing.groupby('has_license', as_index=False)['price'].mean()
fig = px.bar(
    df_license,
    x='has_license',
    y='price',
    color='has_license',
    title='Average Price by License Status',
    labels={'price': 'Average Price ($)', 'has_license': 'Has License'}
)
fig.update_layout(
    xaxis_title='Has License',
    yaxis_title='Average Price ($)',
    template='plotly_white'
)
fig.show()

Houses with licences tend to avarege a higher price than houses without licences

In [None]:
fig = px.box(
    listing,
    x='neighbourhood_group',
    y='price',
    color='neighbourhood_group',
    title='Price Distribution by Neighbourhood Group',
    labels={'price': 'Price ($)', 'neighbourhood_group': 'Neighbourhood Group'}
)
fig.update_layout(
    xaxis_title='Neighbourhood Group',
    yaxis_title='Price ($)',
    template='plotly_white'
)
fig.show()


In [None]:
listing1 = listing.copy()

In [None]:
import pandas as pd
import plotly.express as px

# Step 1: Create estimated 'booked_days' column
listing1['booked_days'] = 365 - listing['availability_365']

# Step 2: Average booked days per room type
df_bookings = listing1.groupby('room_type', as_index=False)['booked_days'].mean()

# Step 3: Plot
fig = px.bar(
    df_bookings,
    x='room_type',
    y='booked_days',
    color='room_type',
    title='Average Booked Days by Room Type (Estimated)',
    labels={
        'room_type': 'Room Type',
        'booked_days': 'Estimated Booked Days'
    }
)

fig.update_layout(
    xaxis_title='Room Type',
    yaxis_title='Avg. Booked Days (per Year)',
    template='plotly_white'
)

fig.show()


Majority of the people booked private rooms, and hotel rooms were the least booked, this could be as a hotel rooms being the most expensive rooms

In [None]:
import plotly.express as px

# Group by neighbourhood and calculate average total reviews
grouped_review = listing1.groupby('neighbourhood_group', as_index=False)['total_reviews'].mean()

# Sort neighbourhoods by review count for better ordering in the plot
grouped_review = grouped_review.sort_values(by='total_reviews', ascending=True)

# Create horizontal bar chart
fig = px.bar(
    grouped_review,
    x='total_reviews',
    y='neighbourhood_group',
    orientation='h',
    title='📊 Average Total Reviews by Neighbourhood',
    color='total_reviews',
    color_continuous_scale='viridis',
    labels={'total_reviews': 'Average Total Reviews', 'neighbourhood': 'Neighbourhood'}
)

# Update layout for better spacing and readability
fig.update_layout(
    template='plotly_white',
    xaxis_title='Average Total Reviews',
    yaxis_title='Neighbourhood',
    title_x=0.5,
    margin=dict(l=100, r=30, t=80, b=40),
    height=600
)

fig.show()
