In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

pd.set_option('display.max_columns', 50)

df = pd.read_csv('https://github.com/bdi475/datasets/raw/main/cu-restaurants-20240409.csv')
df

In [None]:
pd.set_option('display.max_columns', 50)

In [None]:
df = pd.read_csv('https://github.com/bdi475/datasets/raw/main/cu-restaurants-20240409.csv')
df

## Challenge 1: Restaurants by Price Range

In [None]:
fig = px.pie(
    df[df['range'].notna()],
    names='range',
    title='<b>Restaurants by Price Range</b><br><span style="color: #aaa;">CU has few options for fine dining</span>',
    height=500,
    template='simple_white',
    color='range',
    color_discrete_map={
        "$": "#C5E1A5",
        "$$": "#AED581",
        "$$$": "#7CB342"
    },
    labels={
        'range': 'Price Range'
    },
)

fig.update_traces(
    textinfo='percent+label',
    textposition='outside',
    showlegend=False
)

fig.update_layout(
    font_family='Helvetica, Inter, Arial, sans-serif',
)

fig.show()


## Challenge 2: Reviews breakdown by ZIP Code

In [None]:
df_zip_wide = df.groupby(
    ['zip'], as_index=False
).agg({
    'reviews_per_score_1': 'sum',
    'reviews_per_score_2': 'sum',
    'reviews_per_score_3': 'sum',
    'reviews_per_score_4': 'sum',
    'reviews_per_score_5': 'sum',
})

display(df_zip_wide)

In [None]:
df_ratings_by_zip = pd.melt(
    df_zip_wide,
    id_vars='zip',
    value_vars=[
        'reviews_per_score_1',
        'reviews_per_score_2',
        'reviews_per_score_3',
        'reviews_per_score_4',
        'reviews_per_score_5'
    ],
    var_name='review_rating',
    value_name='num_reviews'
)

df_ratings_by_zip['zip'] = df_ratings_by_zip['zip'].astype(str)
df_ratings_by_zip['review_rating'] = df_ratings_by_zip['review_rating'].str.replace('reviews_per_score_', '')
df_ratings_by_zip['percentage'] = df_ratings_by_zip['num_reviews'] / df_ratings_by_zip.groupby('zip')['num_reviews'].transform('sum')
df_ratings_by_zip.sort_values(['zip', 'review_rating'], inplace=True)

df_ratings_by_zip

In [None]:
fig = px.bar(
    df_ratings_by_zip,
    x='num_reviews',
    y='zip',
    color='review_rating',
    color_discrete_map={
        "1": "#EF5350",
        "2": "#EF9A9A",
        "3": "#FDD835",
        "4": "#9CCC65",
        "5": "#689F38"
    },
    labels={
        'review_rating': 'Review Rating',
        'zip': 'ZIP Code',
        'percentage': 'Percentage',
        'num_reviews': 'Number of reviews'
    },
    title='<b>Total number of reviews by zip code</b><br><span style="color: #ccc;">61820 has the largest number of reviews</span>',
    template='simple_white',
    height=500
)

fig.update_layout(
    font_family='Helvetica, Inter, Arial, sans-serif',
)
fig.update_yaxes(categoryorder='total ascending')

fig.show()

## Challenge 3: Reviews breakdown by ZIP Code (100% Stacked Bar Chart)

In [None]:
fig = px.bar(
    df_ratings_by_zip,
    x='zip',
    y='percentage',
    color='review_rating',
    color_discrete_map={
        "1": "#EF5350",
        "2": "#EF9A9A",
        "3": "#FDD835",
        "4": "#9CCC65",
        "5": "#689F38"
    },
    labels={
        'review_rating': 'Review Rating',
        'zip': 'Zip',
        'percentage': 'Percentage'
    },
    title='<b>Review rating breakdown by zip code</b><br><span style="color: #aaa">61820 has the highest proportion of 5 star reviews</span>',
    text=df_ratings_by_zip.apply(lambda r: f"{'⭐' * int(r['review_rating'])} {'{0:.1f}%'.format(r['percentage'] * 100)}", axis=1),
    template='simple_white',
    height=650
)

fig.update_layout(
    yaxis_tickformat=',.0%',
    uniformtext_minsize=10,
    uniformtext_mode='hide',
    font_family='Helvetica, Inter, Arial, sans-serif',
)
fig.for_each_trace(lambda t: t.update(textfont_color='white'))

fig.show()