In [41]:
import pandas as pd

# Define the folder
# FOLDER = '../data/processed/'
FOLDER = '../data/RateBeer/processed/'


# Load the data
df_beers = pd.read_parquet(FOLDER + 'beers.pq')
df_breweries = pd.read_parquet(FOLDER + 'breweries.pq')
df_users = pd.read_parquet(FOLDER + 'users.pq')
df_ratings_no_text = pd.read_parquet(FOLDER + 'ratings_no_text.pq')


In [42]:
# print all columns
print("Beers columns: ", df_beers.columns)
print("Breweries columns: ", df_breweries.columns)
print("Users columns: ", df_users.columns)
print("Ratings columns: ", df_ratings_no_text.columns)

Beers columns:  Index(['beer_id', 'beer_name', 'brewery_id', 'brewery_name', 'style', 'abv'], dtype='object')
Breweries columns:  Index(['brewery_id', 'brewery_name', 'location_brewery'], dtype='object')
Users columns:  Index(['user_id', 'user_name', 'location_user', 'joined'], dtype='object')
Ratings columns:  Index(['date', 'beer_id', 'user_id', 'brewery_id', 'abv', 'style', 'rating',
       'palate', 'taste', 'appearance', 'aroma', 'overall', 'brewery_name',
       'location_brewery', 'user_name', 'location_user', 'joined'],
      dtype='object')


In [43]:
# Merge user and brewery locations into the ratings data
# df_ratings_no_text = df_ratings_no_text.merge(
#     df_users[['user_id', 'location']].rename(columns={'location': 'location_user'}),
#     on='user_id'
# )
# df_ratings_no_text = df_ratings_no_text.merge(
#     df_breweries[['id', 'location']].rename(columns={'location': 'location_brewery', 'id': 'brewery_id'}),
#     on='brewery_id'
# )

# Filter for years from 2002 onwards
df_ratings_no_text['date'] = pd.to_datetime(df_ratings_no_text['date'])
df_ratings_no_text = df_ratings_no_text[df_ratings_no_text['date'].dt.year >= 2002]


In [44]:
print("Beer Columns:")
print(df_beers.columns)
print("\nBrewery Columns:")
print(df_breweries.columns)
print("\nUsers Columns:")
print(df_users.columns)
print("\nRatings Columns:")
print(df_ratings_no_text.columns)

Beer Columns:
Index(['beer_id', 'beer_name', 'brewery_id', 'brewery_name', 'style', 'abv'], dtype='object')

Brewery Columns:
Index(['brewery_id', 'brewery_name', 'location_brewery'], dtype='object')

Users Columns:
Index(['user_id', 'user_name', 'location_user', 'joined'], dtype='object')

Ratings Columns:
Index(['date', 'beer_id', 'user_id', 'brewery_id', 'abv', 'style', 'rating',
       'palate', 'taste', 'appearance', 'aroma', 'overall', 'brewery_name',
       'location_brewery', 'user_name', 'location_user', 'joined'],
      dtype='object')


In [45]:
# Select only countries/states with a significant number of ratings
MIN_RATINGS_THRESHOLD = 1
number_of_ratings_per_location = df_ratings_no_text['location_brewery'].value_counts()
popular_locations = number_of_ratings_per_location[number_of_ratings_per_location > MIN_RATINGS_THRESHOLD].index

# Filter the data
df_ratings_no_text = df_ratings_no_text[df_ratings_no_text['location_brewery'].isin(popular_locations)]
df_beers = df_beers[df_beers['beer_id'].isin(df_ratings_no_text['beer_id'].unique())]
df_breweries = df_breweries[df_breweries['brewery_id'].isin(df_ratings_no_text['brewery_id'].unique())]


In [46]:
# Define a mapping of U.S. states to abbreviations
state_abbrev = {
    "Alabama": "AL", "Alaska": "AK", "Arizona": "AZ", "Arkansas": "AR",
    "California": "CA", "Colorado": "CO", "Connecticut": "CT", "Delaware": "DE",
    "Florida": "FL", "Georgia": "GA", "Hawaii": "HI", "Idaho": "ID",
    "Illinois": "IL", "Indiana": "IN", "Iowa": "IA", "Kansas": "KS",
    "Kentucky": "KY", "Louisiana": "LA", "Maine": "ME", "Maryland": "MD",
    "Massachusetts": "MA", "Michigan": "MI", "Minnesota": "MN", "Mississippi": "MS",
    "Missouri": "MO", "Montana": "MT", "Nebraska": "NE", "Nevada": "NV",
    "New Hampshire": "NH", "New Jersey": "NJ", "New Mexico": "NM", "New York": "NY",
    "North Carolina": "NC", "North Dakota": "ND", "Ohio": "OH", "Oklahoma": "OK",
    "Oregon": "OR", "Pennsylvania": "PA", "Rhode Island": "RI", "South Carolina": "SC",
    "South Dakota": "SD", "Tennessee": "TN", "Texas": "TX", "Utah": "UT",
    "Vermont": "VT", "Virginia": "VA", "Washington": "WA", "West Virginia": "WV",
    "Wisconsin": "WI", "Wyoming": "WY"
}

# Add state and country information to the datasets
def add_location_info(df, location_column):
    df['is_us'] = df[location_column].str.contains("United States")
    df['state_name'] = df[location_column].where(df['is_us']).str.split(", ").str[1]
    df['state_abbrev'] = df['state_name'].map(state_abbrev)
    df['country'] = df[location_column].where(~df['is_us'])
    return df

df_ratings_no_text = add_location_info(df_ratings_no_text, 'location_brewery')
df_breweries = add_location_info(df_breweries, 'location_brewery')
df_users = add_location_info(df_users, 'location_user')
df_beers = df_beers.merge(df_breweries[['brewery_id', 'location_brewery']], left_on='brewery_id', right_on='brewery_id')
df_beers = add_location_info(df_beers, 'location_brewery')


In [47]:
# Ratings per country and state
ratings_per_country = df_ratings_no_text[~df_ratings_no_text['is_us']].groupby('country').size().reset_index(name='nbr_ratings')
ratings_per_state = df_ratings_no_text[df_ratings_no_text['is_us']].groupby('state_abbrev').size().reset_index(name='nbr_ratings')

# Breweries per country and state
breweries_per_country = df_breweries[~df_breweries['is_us']].groupby('country').size().reset_index(name='nbr_breweries')
breweries_per_state = df_breweries[df_breweries['is_us']].groupby('state_abbrev').size().reset_index(name='nbr_breweries')

# Users per country and state
users_per_country = df_users[~df_users['is_us']].groupby('country').size().reset_index(name='nbr_users')
users_per_state = df_users[df_users['is_us']].groupby('state_abbrev').size().reset_index(name='nbr_users')

# Beers per country and state
beers_per_country = df_beers[~df_beers['is_us']].groupby('country').size().reset_index(name='nbr_beers')
beers_per_state = df_beers[df_beers['is_us']].groupby('state_abbrev').size().reset_index(name='nbr_beers')


In [48]:
import plotly.graph_objects as go

# Initialize figure
fig = go.Figure()

# Ratings
fig.add_trace(go.Choropleth(
    locations=ratings_per_country['country'],
    locationmode="country names",
    z=ratings_per_country['nbr_ratings'],
    colorscale="Blues",
    showscale=False,  # Suppress legend for countries
    hovertemplate="<b>%{location}</b><br>Count: %{z}<extra></extra>",  # Customized hover
    visible=True
))
fig.add_trace(go.Choropleth(
    locations=ratings_per_state['state_abbrev'],
    locationmode="USA-states",
    z=ratings_per_state['nbr_ratings'],
    colorscale="Blues",
    colorbar=dict(
        title="Ratings",
        tickformat=".1s",
        len=0.5
    ),
    hovertemplate="<b>%{location}</b><br>Count: %{z}<extra></extra>",  # Customized hover
    visible=True
))

# Breweries
fig.add_trace(go.Choropleth(
    locations=breweries_per_country['country'],
    locationmode="country names",
    z=breweries_per_country['nbr_breweries'],
    colorscale="Reds",
    showscale=False,  # Suppress legend for countries
    hovertemplate="<b>%{location}</b><br>Count: %{z}<extra></extra>",  # Customized hover
    visible=False
))
fig.add_trace(go.Choropleth(
    locations=breweries_per_state['state_abbrev'],
    locationmode="USA-states",
    z=breweries_per_state['nbr_breweries'],
    colorscale="Reds",
    colorbar=dict(
        title="Breweries",
        tickformat=".1s",
        len=0.5
    ),
    hovertemplate="<b>%{location}</b><br>Count: %{z}<extra></extra>",  # Customized hover
    visible=False
))

# Users
fig.add_trace(go.Choropleth(
    locations=users_per_country['country'],
    locationmode="country names",
    z=users_per_country['nbr_users'],
    colorscale="Greens",
    showscale=False,  # Suppress legend for countries
    hovertemplate="<b>%{location}</b><br>Count: %{z}<extra></extra>",  # Customized hover
    visible=False
))
fig.add_trace(go.Choropleth(
    locations=users_per_state['state_abbrev'],
    locationmode="USA-states",
    z=users_per_state['nbr_users'],
    colorscale="Greens",
    colorbar=dict(
        title="Users",
        tickformat=".1s",
        len=0.5
    ),
    hovertemplate="<b>%{location}</b><br>Count: %{z}<extra></extra>",  # Customized hover
    visible=False
))

# Beers
fig.add_trace(go.Choropleth(
    locations=beers_per_country['country'],
    locationmode="country names",
    z=beers_per_country['nbr_beers'],
    colorscale="Oranges",
    showscale=False,  # Suppress legend for countries
    hovertemplate="<b>%{location}</b><br>Count: %{z}<extra></extra>",  # Customized hover
    visible=False
))
fig.add_trace(go.Choropleth(
    locations=beers_per_state['state_abbrev'],
    locationmode="USA-states",
    z=beers_per_state['nbr_beers'],
    colorscale="Oranges",
    colorbar=dict(
        title="Beers",
        tickformat=".1s",
        len=0.5
    ),
    hovertemplate="<b>%{location}</b><br>Count: %{z}<extra></extra>",  # Customized hover
    visible=False
))

# Dropdown menu
fig.update_layout(
    updatemenus=[
        dict(
            buttons=[
                dict(args=[{"visible": [True, True, False, False, False, False, False, False]}], label="Ratings", method="update"),
                dict(args=[{"visible": [False, False, True, True, False, False, False, False]}], label="Breweries", method="update"),
                dict(args=[{"visible": [False, False, False, False, True, True, False, False]}], label="Users", method="update"),
                dict(args=[{"visible": [False, False, False, False, False, False, True, True]}], label="Beers", method="update"),
            ],
            direction="down",
            x=0.1,
            y=1,  # Dropdown below title
            xanchor="left",
            yanchor="top"
        )
    ],
    title=dict(
        text="Beer Statistics by Country and US State",
        x=0.5,
        xanchor="center"
    ),
    geo=dict(
        showframe=False,
        showcoastlines=True,
        projection_type="equirectangular"
    ),
    height=600,
    width=800
)

# Show the plot
fig.show()


In [49]:
# Initialize 3D globe figure
fig_globe = go.Figure()

# Ratings
fig_globe.add_trace(go.Choropleth(
    locations=ratings_per_country['country'],
    locationmode="country names",
    z=ratings_per_country['nbr_ratings'],
    colorscale="Blues",
    showscale=False,
    hovertemplate="<b>%{location}</b><br>Count: %{z}<extra></extra>",  # Customized hover
    visible=True
))
fig_globe.add_trace(go.Choropleth(
    locations=ratings_per_state['state_abbrev'],
    locationmode="USA-states",
    z=ratings_per_state['nbr_ratings'],
    colorscale="Blues",
    colorbar=dict(
        title="Ratings",
        tickformat=".1s",
        len=0.5
    ),
    hovertemplate="<b>%{location}</b><br>Count: %{z}<extra></extra>",  # Customized hover
    visible=True
))

# Breweries
fig_globe.add_trace(go.Choropleth(
    locations=breweries_per_country['country'],
    locationmode="country names",
    z=breweries_per_country['nbr_breweries'],
    colorscale="Reds",
    showscale=False,
    hovertemplate="<b>%{location}</b><br>Count: %{z}<extra></extra>",  # Customized hover
    visible=False
))
fig_globe.add_trace(go.Choropleth(
    locations=breweries_per_state['state_abbrev'],
    locationmode="USA-states",
    z=breweries_per_state['nbr_breweries'],
    colorscale="Reds",
    colorbar=dict(
        title="Breweries",
        tickformat=".1s",
        len=0.5
    ),
    hovertemplate="<b>%{location}</b><br>Count: %{z}<extra></extra>",  # Customized hover
    visible=False
))

# Users
fig_globe.add_trace(go.Choropleth(
    locations=users_per_country['country'],
    locationmode="country names",
    z=users_per_country['nbr_users'],
    colorscale="Greens",
    showscale=False,
    hovertemplate="<b>%{location}</b><br>Count: %{z}<extra></extra>",  # Customized hover
    visible=False
))
fig_globe.add_trace(go.Choropleth(
    locations=users_per_state['state_abbrev'],
    locationmode="USA-states",
    z=users_per_state['nbr_users'],
    colorscale="Greens",
    colorbar=dict(
        title="Users",
        tickformat=".1s",
        len=0.5
    ),
    hovertemplate="<b>%{location}</b><br>Count: %{z}<extra></extra>",  # Customized hover
    visible=False
))

# Beers
fig_globe.add_trace(go.Choropleth(
    locations=beers_per_country['country'],
    locationmode="country names",
    z=beers_per_country['nbr_beers'],
    colorscale="Oranges",
    showscale=False,
    hovertemplate="<b>%{location}</b><br>Count: %{z}<extra></extra>",  # Customized hover
    visible=False
))
fig_globe.add_trace(go.Choropleth(
    locations=beers_per_state['state_abbrev'],
    locationmode="USA-states",
    z=beers_per_state['nbr_beers'],
    colorscale="Oranges",
    colorbar=dict(
        title="Beers",
        tickformat=".1s",
        len=0.5
    ),
    hovertemplate="<b>%{location}</b><br>Count: %{z}<extra></extra>",  # Customized hover
    visible=False
))

# Dropdown menu
fig_globe.update_layout(
    updatemenus=[
        dict(
            buttons=[
                dict(args=[{"visible": [True, True, False, False, False, False, False, False]}], label="Ratings", method="update"),
                dict(args=[{"visible": [False, False, True, True, False, False, False, False]}], label="Breweries", method="update"),
                dict(args=[{"visible": [False, False, False, False, True, True, False, False]}], label="Users", method="update"),
                dict(args=[{"visible": [False, False, False, False, False, False, True, True]}], label="Beers", method="update"),
            ],
            direction="down",
            x=0.1,
            y=1,
            xanchor="left",
            yanchor="top"
        )
    ],
    title=dict(
        text="Beer Statistics on a 3D Globe",
        x=0.5,
        xanchor="center"
    ),
    geo=dict(
        showframe=False,
        showcoastlines=True,
        projection_type="orthographic",
        showcountries=True,
        showland=True,
        landcolor="rgb(217, 217, 217)",
        lakecolor="rgb(255, 255, 255)",
        oceancolor="rgb(204, 229, 255)",
        projection_rotation=dict(lat=10, lon=20)
    ),
    height=600,
    width=800
)

# Show the plot
fig_globe.show()


In [50]:
# Save the plots to file
from plotly.io import write_html

# Save flat map to an HTML file
write_html(fig, "../docs/plots/flat_map_rb.html")
print("Flat map saved as 'flat_map.html'.")

# Save the globe map to an HTML file
output_file = "../docs/plots/globe_map_rb.html"

# Add JavaScript for auto-rotation and user interaction
rotation_script = """
<script>
    let angle = 0; // Current rotation angle
    const rotationSpeed = 0.2; // Rotation speed in degrees per interval
    let isDragging = false; // Track if the user is dragging
    let lastMouseX = null; // Track the last mouse position
    let rotationInterval = null; // Interval for automatic rotation

    // Function to rotate the globe automatically
    function rotateGlobe() {
        if (!isDragging) {
            angle = (angle + rotationSpeed) % 360; // Increment angle
            Plotly.relayout('globe-map', {
                'geo.projection.rotation': {
                    lon: angle
                }
            });
        }
    }

    // Function to start automatic rotation
    function startAutoRotation() {
        if (!rotationInterval) {
            rotationInterval = setInterval(rotateGlobe, 30); // Adjust interval time for smoother rotation
        }
    }

    // Function to stop automatic rotation
    function stopAutoRotation() {
        if (rotationInterval) {
            clearInterval(rotationInterval);
            rotationInterval = null;
        }
    }

    // Mouse down event: Start dragging
    document.addEventListener('mousedown', (event) => {
        if (event.target.closest('#globe-map')) {
            isDragging = true;
            lastMouseX = event.clientX; // Store the initial mouse position
            stopAutoRotation(); // Stop auto rotation during dragging
        }
    });

    // Mouse move event: Rotate globe manually
    document.addEventListener('mousemove', (event) => {
        if (isDragging && lastMouseX !== null) {
            const deltaX = event.clientX - lastMouseX; // Calculate the horizontal mouse movement
            angle = (angle - deltaX * 0.5) % 360; // Adjust angle based on mouse movement
            lastMouseX = event.clientX; // Update the last mouse position
            Plotly.relayout('globe-map', {
                'geo.projection.rotation': {
                    lon: angle
                }
            });
        }
    });

    // Mouse up event: Stop dragging and resume auto rotation
    document.addEventListener('mouseup', () => {
        if (isDragging) {
            isDragging = false;
            lastMouseX = null;
            startAutoRotation(); // Resume auto rotation after dragging
        }
    });

    // Start automatic rotation when the page loads
    document.addEventListener("DOMContentLoaded", function () {
        startAutoRotation();
    });
</script>
"""

# Save the Plotly figure as an HTML file
write_html(fig_globe, file=output_file, full_html=True, div_id="globe-map")

# Inject the JavaScript into the HTML file
with open(output_file, 'r') as file:
    html_content = file.read()

# Insert the JavaScript before the closing </body> tag
html_content = html_content.replace("</body>", rotation_script + "</body>")

# Write the updated HTML content back to the file
with open(output_file, 'w') as file:
    file.write(html_content)

print(f"3D globe map saved with interactive rotation at {output_file}")


Flat map saved as 'flat_map.html'.
3D globe map saved with interactive rotation at ../docs/plots/globe_map_rb.html
