# GeoCache: *Wine Spectator*'s Top 100 Wines, 1988-2020
List available online on *Wine Spectator*'s [Top 100 Lists web site](https://top100.winespectator.com/lists/).

## File Setup

In [143]:
# import and initialize main python libraries
import numpy as np
import pandas as pd
import shapefile as shp
import matplotlib.pyplot as plt
import bar_chart_race as bcr
import seaborn as sns

# import libraries for file navigation
import os
import shutil
import glob
from pandas_ods_reader import read_ods

# import other packages
from scipy import stats
from sklearn import linear_model

# import geo packages
import geopandas as gpd
import descartes
from shapely.geometry import Point, Polygon

# import Geopy packages
import geopy
from geopy.geocoders import Nominatim

In [144]:
# initialize vizualization set
sns.set(style="whitegrid", palette="colorblind", color_codes=True)
sns.mpl.rc("figure", figsize=(10, 6))

# Jupyter Notebook
%matplotlib inline

## Dataframe Exploration

In [145]:
# Note: save CSV files in UTF-8 format to preserve special characters.
df_Wine = pd.read_csv('./Wine_Hier00.csv')
df_GeoCache = pd.read_csv('./GeoCache.csv')
df_Flag = pd.read_csv('./emoji_list.csv')

In [146]:
# CSV of wines is retaining a blank row at the end of the dataset. Remove the last row to prevent data type errors.

# number of rows to drop
n = 1

df_Wine.drop(df_Wine.tail(n).index, inplace = True)

In [147]:
df_Wine.shape

(3300, 25)

In [148]:
df_GeoCache.shape

(1224, 8)

In [149]:
df_Flag.shape

(19, 3)

In [150]:
df_GeoCache.sample(10)

Unnamed: 0,Geography,Hierarchy,Address,loc,point,lat,long,altitude
316,St. Helena,Hierarchy_00,USA,United States,"(39.7837304, -100.4458825, 0.0)",39.78373,-100.445882,0.0
10,Hunter Valley,Hierarchy_00,Australia,Australia,"(-24.7761086, 134.755, 0.0)",-24.776109,134.755,0.0
353,Vinsobres,Hierarchy_00,France,France,"(46.603354, 1.8883335, 0.0)",46.603354,1.888333,0.0
1011,Chassagne-Montrachet Les Chenevottes,Hierarchy_03,"Chassagne-Montrachet, Côte de Beaune, Burgundy...",,,,,
546,Emilia IGT,Hierarchy_01,"Emilia-Romagna, Italy","Emilia-Romagna, Italia","(44.525696, 11.039437, 0.0)",44.525696,11.039437,0.0
163,IGP Île de Beauté,Hierarchy_00,France,France,"(46.603354, 1.8883335, 0.0)",46.603354,1.888333,0.0
733,Niagara Peninsula,Hierarchy_02,"Niagara Peninsula, Ontario, Canada","Niagara Escarpment, Northern Bruce Peninsula, ...","(45.1515368, -81.3355078, 0.0)",45.151537,-81.335508,0.0
173,Santorini,Hierarchy_00,Greece,Ελλάδα,"(38.9953683, 21.9877132, 0.0)",38.995368,21.987713,0.0
384,Western Australia,Hierarchy_01,"Western Australia, Australia","Western Australia, Australia","(-25.2303005, 121.0187246, 0.0)",-25.230301,121.018725,0.0
253,Alentejo,Hierarchy_00,Portugal,Portugal,"(40.0332629, -7.8896263, 0.0)",40.033263,-7.889626,0.0


In [151]:
df_Flag

Unnamed: 0,Address,Flag_and_Country,Flag
0,Argentina,🇦🇷 Argentina,🇦🇷
1,Australia,🇦🇺 Australia,🇦🇺
2,Austria,🇦🇹 Austria,🇦🇹
3,Canada,🇨🇦 Canada,🇨🇦
4,Chile,🇨🇱 Chile,🇨🇱
5,France,🇫🇷 France,🇫🇷
6,Germany,🇩🇪 Germany,🇩🇪
7,Greece,🇬🇷 Greece,🇬🇷
8,Hungary,🇭🇺 Hungary,🇭🇺
9,Israel,🇮🇱 Israel,🇮🇱


## Year over Year Analysis - Totals by Country

In [152]:
# pivot the Wine List by Country, Year, and count of Wine Spectator Scores
df_WineByCountry = pd.DataFrame({'count' : df_Wine.groupby(['Review_Year', 'Address'])['Score'].count()}).reset_index()

In [153]:
# Subset the GeoCache dataframe to exclude Geography column, return only unique values
df_GeoHierarchy = df_GeoCache.drop(columns = ['Geography'])
df_GeoHier00 = df_GeoHierarchy[ (df_GeoHierarchy['Hierarchy'] == 'Hierarchy_00') ]
df_GeoHier00 = df_GeoHier00.drop_duplicates()

In [154]:
# Join GeoCache dataframe to Wine List dataframe to obtain 
df_WineByCountry = pd.merge(df_WineByCountry, df_GeoHier00, on = 'Address', how = 'left')
df_WineByCountry = pd.merge(df_WineByCountry, df_Flag, on = 'Address', how = 'left')
df_WineByCountry = df_WineByCountry.sort_values(['Review_Year', 'count'], ascending = [True, False])

In [155]:
# represent Review Year as a Whole Number
df_WineByCountry['Review_Year'] = df_WineByCountry['Review_Year'].astype(int)

## Bar Chart Race: Count of Scores by Country by Year

In [156]:
# pivot the data to facilitate bar chart race
df_BRC_Wine = df_WineByCountry.pivot(index = 'Review_Year', columns = 'Address', values = 'count')

# replace 'NaN' results in chart with 0 to facilitate graphing
df_BRC_Wine.fillna(value = 0, inplace = True)

df_BRC_Wine

Address,Argentina,Australia,Austria,Canada,Chile,France,Germany,Greece,Hungary,Israel,Italy,Lebanon,New Zealand,Portugal,South Africa,Spain,USA,Ukraine,Uruguay
Review_Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1988,0.0,1.0,0.0,0.0,0.0,45.0,0.0,0.0,0.0,0.0,15.0,1.0,1.0,0.0,0.0,3.0,34.0,0.0,0.0
1989,0.0,4.0,0.0,0.0,0.0,25.0,0.0,0.0,0.0,0.0,8.0,0.0,1.0,0.0,0.0,1.0,61.0,0.0,0.0
1990,0.0,2.0,0.0,0.0,0.0,24.0,3.0,0.0,0.0,0.0,15.0,0.0,0.0,0.0,0.0,5.0,51.0,0.0,0.0
1991,0.0,2.0,0.0,0.0,0.0,39.0,6.0,0.0,0.0,0.0,13.0,0.0,1.0,1.0,0.0,1.0,37.0,0.0,0.0
1992,0.0,4.0,0.0,0.0,1.0,42.0,0.0,0.0,0.0,0.0,11.0,0.0,1.0,0.0,0.0,5.0,36.0,0.0,0.0
1993,0.0,1.0,0.0,0.0,0.0,44.0,3.0,0.0,0.0,0.0,14.0,0.0,0.0,0.0,0.0,0.0,38.0,0.0,0.0
1994,0.0,5.0,0.0,0.0,0.0,16.0,1.0,0.0,0.0,0.0,14.0,0.0,2.0,5.0,0.0,3.0,54.0,0.0,0.0
1995,1.0,8.0,0.0,0.0,0.0,15.0,0.0,0.0,0.0,0.0,11.0,0.0,1.0,3.0,2.0,3.0,56.0,0.0,0.0
1996,0.0,16.0,0.0,0.0,2.0,16.0,0.0,0.0,0.0,0.0,6.0,0.0,1.0,1.0,1.0,1.0,56.0,0.0,0.0
1997,0.0,5.0,0.0,0.0,1.0,28.0,2.0,0.0,0.0,0.0,4.0,0.0,2.0,4.0,1.0,5.0,48.0,0.0,0.0


In [157]:
# bar chart race - visualize in Jupyter Notebook
bcr.bar_chart_race(
    df = df_BRC_Wine,
    filename = None,
    orientation = 'h',
    sort = 'desc',
    #n_bars = 4,
    fixed_order = True,
    fixed_max = True,
    steps_per_period = 10,
    interpolate_period = False,
    label_bars = True,
    bar_size = 0.95,
    #period_label = {'x': 0.99, 'y': 0.25, 'ha': 'right', 'va': 'center'},
    period_fmt = 'Review Year - {x:.0f}',
    period_length = 1000,
    figsize = (6, 4),
    dpi = 144,
    cmap = 'dark12',
    title = 'Representation in Wine Spectator Top 100 Wines\nCount of Scores by Country by Year',
    title_size = '',
    #bar_label_size = 7,
    #tick_label_size = 7,
    scale = 'linear',
    writer = None,
    fig = None,
    #bar_kwargs: {'alpha': 0.7},
    filter_column_colors = True
)

In [158]:
# bar chart race - visualize in Jupyter Notebook
bcr.bar_chart_race(
    df = df_BRC_Wine,
    filename = './images/BRC_Count_Country_Year.mp4',
    orientation = 'h',
    sort = 'desc',
    #n_bars = 4,
    fixed_order = True,
    fixed_max = True,
    steps_per_period = 10,
    interpolate_period = False,
    label_bars = True,
    bar_size = 0.95,
    #period_label = {'x': 0.99, 'y': 0.25, 'ha': 'right', 'va': 'center'},
    period_fmt = 'Review Year - {x:.0f}',
    period_length = 1000,
    figsize = (6, 4),
    dpi = 144,
    cmap = 'dark12',
    title = 'Representation in Wine Spectator Top 100 Wines\nCount of Scores by Country by Year',
    title_size = '',
    #bar_label_size = 7,
    #tick_label_size = 7,
    scale = 'linear',
    writer = None,
    fig = None,
    #bar_kwargs: {'alpha': 0.7},
    filter_column_colors = True
)