In [42]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path
import numpy as np

In [2]:
df = pd.read_csv('csv-files/Kidney Stone Reviews - Master Sheet - WebMD.csv')
print(f"Successfully loaded {len(df)} rows of data")

Successfully loaded 1572 rows of data


In [44]:
PLOT_PARAMS = {
    'width': 400,
    'height': 200,
    'margin': dict(l=50, r=50, t=50, b=50),
    'title_font_size': 14
}

for medicine in df['Medicine'].unique():
    medicine_data = df[df['Medicine'] == medicine]
    
    # Manually calculate the bins
    bins = [1.0, 2.0, 3.0, 4.0, 5.0, 5.1]  # 5.1 to include 5.0 in last bin
    counts, _ = np.histogram(medicine_data['Overall Rating'], bins=bins)
    
    # Create bar plot
    fig = go.Figure(data=[
        go.Bar(
            x=[1, 2, 3, 4, 5],  # Center bars on these x values
            y=counts,
            width=0.8,  # Bar width
            hovertemplate=(
                "Overall Rating: %{customdata}<br>" +
                "Count: %{y}" +
                "<extra></extra>"
            ),
            customdata=["1-1.9", "2-2.9", "3-3.9", "4-4.9", "5"]
        )
    ])

    fig.update_layout(
        title=f'Distribution of Ratings: {medicine}',
        xaxis_title='Rating',
        yaxis_title='Count',
        template='plotly_white',
        width=PLOT_PARAMS['width'],
        height=PLOT_PARAMS['height'],
        bargap=0.1,
        margin=PLOT_PARAMS['margin'],
        title_font_size=PLOT_PARAMS['title_font_size'],
        xaxis=dict(
            tickmode='array',
            tickvals=[1, 2, 3, 4, 5],
            ticktext=['1', '2', '3', '4', '5'],
            range=[0.5, 5.5]
        )
    )

    fig.show()

In [57]:
all_data = []
for medicine in sorted(df['Medicine'].unique()):
    medicine_data = df[df['Medicine'] == medicine]
    bins = [1.0, 2.0, 3.0, 4.0, 5.0, 5.1]
    counts, _ = np.histogram(medicine_data['Overall Rating'], bins=bins)
    
    # Normalize the counts to percentages
    total_reviews = len(medicine_data)
    percentages = (counts / total_reviews) * 100
    
    ratings_df = pd.DataFrame({
        'Rating': [1, 2, 3, 4, 5],
        'Percentage': percentages,
        'Medicine': medicine,
        'RatingRange': ["1-1.9", "2-2.9", "3-3.9", "4-4.9", "5"]  # Add rating ranges for hover
    })
    all_data.append(ratings_df)

plot_df = pd.concat(all_data)

fig = px.bar(plot_df, 
             x='Rating', 
             y='Percentage',
             color='Medicine',
             barmode='group',
             width=PLOT_PARAMS['width'] * 2.0,
             height=PLOT_PARAMS['height'] * 2.0)

fig.update_layout(
    title='Distribution of Ratings by Medicine (%)',
    xaxis_title='Rating',
    yaxis_title='Percentage of Reviews',
    template='plotly_white',
    bargap=0.15,
    bargroupgap=0.1,
    margin=PLOT_PARAMS['margin'],
    title_font_size=PLOT_PARAMS['title_font_size'],
    xaxis=dict(
        tickmode='array',
        tickvals=[1, 2, 3, 4, 5],
        ticktext=['1', '2', '3', '4', '5'],
        range=[0.5, 5.5]
    ),
    legend=dict(
        orientation='h',
        yanchor='top',
        y=-0.25,  # Move legend below x-axis title
        xanchor='center',
        x=0.5,
        title=None
    )
)

# Update the traces differently since we have grouped bars
for medicine_name in plot_df['Medicine'].unique():
    medicine_data = plot_df[plot_df['Medicine'] == medicine_name]
    fig.update_traces(
        customdata=medicine_data[['Medicine', 'RatingRange']].values,
        hovertemplate=(
            "%{customdata[0]}<br>" +
            "Rating: %{customdata[1]}<br>" +
            "Percentage: %{y:.1f}%" +
            "<extra></extra>"
        ),
        selector=dict(name=medicine_name)  # This ensures we update the correct trace
    )

fig.show()