# BEERS TIMELINE ANALYSIS

In [1]:
import pandas as pd
import numpy as np
import importlib
import matplotlib.pyplot as plt
import src.utils.utilities_beers_analysis as utils_beers
import seaborn as sns
import warnings
import plotly.express as px
warnings.filterwarnings('ignore')
from collections import Counter

# Import data

In [2]:
original_PATH = 'data/Original'
processed_PATH = 'data/Processed'

# Loading Data Processed
beer_BA = pd.read_csv(processed_PATH + '/BeerAdvocate/beers_processed_dual.csv')
breweries_BA = pd.read_csv(processed_PATH + '/BeerAdvocate/breweries_processed.csv')
users_BA = pd.read_csv(processed_PATH + '/BeerAdvocate/users_processed.csv')
beer_RB = pd.read_csv(processed_PATH + '/RateBeer/beers_processed_dual.csv')
breweries_RB = pd.read_csv(processed_PATH + '/RateBeer/breweries_processed.csv')
users_RB = pd.read_csv(processed_PATH + '/RateBeer/users_processed.csv')
ratings_RB = pd.read_csv(processed_PATH + '/RateBeer/ratings_processed.csv')
ratings_BA = pd.read_csv(processed_PATH + '/BeerAdvocate/ratings_processed.csv')

Ensurance of data continuity

In [3]:
print(beer_BA.columns)
print(beer_RB.columns)
print(beer_BA.columns == beer_RB.columns)

Index(['beer_id', 'beer_name', 'brewery_id', 'brewery_name', 'style', 'abv',
       'nbr_ratings', 'avg', 'Style_score', 'Overall_score',
       'nbr_matched_valid_ratings', 'location', 'location_region', 'US', 'UK',
       'Canada'],
      dtype='object')
Index(['beer_id', 'beer_name', 'brewery_id', 'brewery_name', 'style', 'abv',
       'nbr_ratings', 'avg', 'Style_score', 'Overall_score',
       'nbr_matched_valid_ratings', 'location', 'location_region', 'US', 'UK',
       'Canada'],
      dtype='object')
[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True]


# Initial distribution analysis

In [4]:
utils_beers.plot_US_map_data(beer_BA)

In [5]:
utils_beers.plot_US_map_data(beer_RB)

In [6]:
utils_beers.plot_world_map_data(beer_BA)

In [7]:
utils_beers.plot_world_map_data(beer_RB)

# Initial timeline analysis

## Timelines of beers rated 

### US

In [8]:
ratings_BA.columns

Index(['beer_name', 'beer_id', 'brewery_name', 'brewery_id', 'style', 'abv',
       'date', 'user_name', 'user_id', 'appearance', 'aroma', 'palate',
       'taste', 'overall', 'rating', 'text', 'location_beer',
       'location_region_beer', 'US_beer', 'UK_beer', 'Canada_beer',
       'location_user', 'location_region_user', 'US_user', 'UK_user',
       'Canada_user', 'year'],
      dtype='object')

In [9]:
ratings_BA_US = ratings_BA[ratings_BA['US_beer']]
ratings_BA_US_loc_year = ratings_BA_US.groupby(['year', 'location_region_beer']).size().reset_index(name='num_ratings')
ratings_BA_US_loc_year = ratings_BA_US_loc_year.pivot_table(index='year', columns='location_region_beer', values='num_ratings', aggfunc='sum')
ratings_BA_US_loc_year = ratings_BA_US_loc_year.fillna(0).astype(int)
ratings_BA_US_loc_year.head()


ratings_RB_US = ratings_RB[ratings_RB['US_beer']]
ratings_RB_US_loc_year = ratings_RB_US.groupby(['year', 'location_region_beer']).size().reset_index(name='num_ratings')
ratings_RB_US_loc_year = ratings_RB_US_loc_year.pivot_table(index='year', columns='location_region_beer', values='num_ratings', aggfunc='sum')
ratings_RB_US_loc_year = ratings_RB_US_loc_year.fillna(0).astype(int)
ratings_RB_US_loc_year.head()

location_region_beer,Colorado,Florida
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2012.0,0,5
2013.0,5,16
2014.0,4,17
2015.0,3,15
2016.0,7,23


In [10]:
utils_beers.plot_US_map_data_by_year(ratings_BA_US_loc_year)
utils_beers.plot_US_map_data_by_year(ratings_RB_US_loc_year)


### World

In [11]:
ratings_BA_loc_year = ratings_BA.groupby(['year', 'location_beer']).size().reset_index(name='num_ratings')
ratings_BA_loc_year = ratings_BA_loc_year.pivot_table(index='year', columns='location_beer', values='num_ratings', aggfunc='sum')
ratings_BA_loc_year = ratings_BA_loc_year.fillna(0).astype(int)  # Fill NaN with 0 and convert to int

ratings_RB_loc_year = ratings_RB.groupby(['year', 'location_beer']).size().reset_index(name='num_ratings')
ratings_RB_loc_year = ratings_RB_loc_year.pivot_table(index='year', columns='location_beer', values='num_ratings', aggfunc='sum')
ratings_RB_loc_year = ratings_RB_loc_year.fillna(0).astype(int)  # Fill NaN with 0 and convert to int

In [12]:
utils_beers.plot_world_map_data_by_year(ratings_BA_loc_year)
utils_beers.plot_world_map_data_by_year(ratings_RB_loc_year)