Do neighbouring states rate the beers more favourably or not compared to non neighbouring states?
Mapping the beer appreciation of geographically maybe? Eg for Californian beer, there is a concentration of enjoyment in California compared to other states.


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [None]:


# Assuming 'ratings' is your DataFrame with user_state, beer_state, and rating
# And 'neighbours_df' is your DataFrame containing the list of neighbouring states

#Calculate and return the average ratings for in state, its neighbours, and non-neighbors
def calculate_state_ratings(state, ratings, neighbours_df):
    #Get the neighbours for the given state
    neighbours = neighbours_df.loc[neighbours_df['state'] == state, 'neighbours'].values[0]
    # Get ratings for the state, its neighbors, and non-neighbors
    state_ratings = ratings[ratings['beer_state'] == state]
    
    #ratings for in state, neighbours and non neighbours
    in_state_ratings = state_ratings[state_ratings['user_state'] == state]['rating']
    neighbours_ratings = state_ratings[state_ratings['user_state'].isin(neighbours)]['rating']
    non_neighbours_ratings = state_ratings[~state_ratings['user_state'].isin(neighbours) & (state_ratings['user_state'] != state)]['rating']
    
    # Calculate the average ratings while considering the fact there may not by any states present and average is None
    in_state_avg = in_state_ratings.mean() if not in_state_ratings.empty else None
    neighbours_avg = neighbours_ratings.mean() if not neighbours_ratings.empty else None
    non_neighbours_avg = non_neighbours_ratings.mean() if not non_neighbours_ratings.empty else None
    
    return state_avg, neighbours_avg, non_neighbours_avg

# List of all 50 states
states = US_ratings['user_state'].unique()

# Store results
ratings_results = []

# Loop through all states and calculate average ratings using function above
#creating dictionary with all averages
for state in states:
    state_avg, neighbours_avg, non_neighbours_avg = calculate_state_ratings(state, US_ratings, neighbours_df)
    ratings_results.append({
        'state': state,
        'state_avg': state_avg,
        'neighbours_avg': neighbours_avg,
        'non_neighbours_avg': non_neighbours_avg
    })

ratings_df = pd.DataFrame(ratings_results)

print(ratings_df.head(3))

plt.figure(figsize=(12, 8))

bar = 0.25
index = np.arange(len(ratings_df))

#placing bars for each location of ratings next to each other
plt.bar(index, ratings_df['state_avg'], bar, label='In state Ratings', color='blue')
plt.bar(index + bar, ratings_df['neighbours_avg'], bar, label='Neighbour Ratings', color='red')
plt.bar(index + 2 * bar, ratings_df['non_neighbours_avg'], bar, label='Non-Neighbour Ratings', color='green')
plt.xticks(index + bar, ratings_df['state'], rotation=90)
plt.xlabel('State')
plt.ylabel('Average Rating')
plt.title('Average ratings of beers originating from given state: In state, Neighbours, Non-Neighbours')
plt.legend()
plt.tight_layout()
plt.show()
