# GeoCache: *Wine Spectator*'s Top 100 Wines, 1988-2020
List available online on *Wine Spectator*'s [Top 100 Lists web site](https://top100.winespectator.com/lists/).

## File Setup

In [1]:
# import and initialize main python libraries
import numpy as np
import pandas as pd
import shapefile as shp
import matplotlib.pyplot as plt
import seaborn as sns

# import libraries for file navigation
import os
import shutil
import glob
from pandas_ods_reader import read_ods

# import other packages
from scipy import stats
from sklearn import linear_model

# import geo packages
import geopandas as gpd
import descartes
from shapely.geometry import Point, Polygon

# import Geopy packages
import geopy
from geopy.geocoders import Nominatim

In [2]:
# initialize vizualization set
sns.set(style="whitegrid", palette="colorblind", color_codes=True)
sns.mpl.rc("figure", figsize=(10, 6))

# Jupyter Notebook
%matplotlib inline

## Dataframe Exploration

In [3]:
# Note: save CSV files in UTF-8 format to preserve special characters.
df_Wine = pd.read_csv('./CSV_Wines.csv')
df_GeoCache = pd.read_csv('./CSV_GeoCache.csv')
df_GeoList = pd.read_csv('./CSV_GeoList.csv')

In [4]:
df_Wine.shape

(3301, 18)

In [5]:
df_Wine.dtypes

Review_Year           float64
Rank                   object
Vintage                object
Score                 float64
Price                  object
Winemaker              object
Wine                   object
Wine_Style             object
Grape_Blend            object
Blend_List             object
Geography              object
Cases_Made            float64
Cases_Imported        float64
Reviewer               object
Drink_now             float64
Best_Drink_from       float64
Best_Drink_Through    float64
Review                 object
dtype: object

In [6]:
df_GeoCache.shape

(1224, 3)

In [7]:
df_GeoList.shape

(445, 1)

In [8]:
df_Wine.sample(10)

Unnamed: 0,Review_Year,Rank,Vintage,Score,Price,Winemaker,Wine,Wine_Style,Grape_Blend,Blend_List,Geography,Cases_Made,Cases_Imported,Reviewer,Drink_now,Best_Drink_from,Best_Drink_Through,Review
2711,1993.0,12,1990,96.0,32,Château Le Gay,Pomerol,Red,Pomerol,,Pomerol,2000.0,,JS,,2000.0,,"An absolutely massive Bordeaux, with tons of e..."
2454,1996.0,55,1992,92.0,29,Trimbach,Riesling Alsace Cuvée Frédéric Émile,White,Riesling,,Alsace,3000.0,,,,,,Sophisticated and elegant. This is firmly stru...
3107,1989.0,8,1986,93.0,95,Château Mouton-Rothschild,Pauillac,Red,Blend,Bordeaux Blend Red,Pauillac,,,,,1999.0,2010.0,Defines Mouton's greatness with incredibly ric...
1670,2004.0,71,2001,91.0,22,Warwick,Three Cape Ladies Simonsberg-Stellenbosch,Red,Blend,"Cabernet Sauvignon, Pinotage and Merlot",Stellenbosch,6000.0,,JM,1.0,2004.0,2006.0,"Suave wine, with black and red currant fruit o..."
824,2012.0,25,2009,96.0,119,Fontodi,Colli della Toscana Centrale Flaccianello,Red,Sangiovese,,Toscana,5000.0,,BS,,2015.0,2032.0,The first impression of this red is purity and...
581,2015.0,82,2011,91.0,23,Viña Polkura,Syrah Marchigue,Red,Shiraz | Syrah,,Colchagua Valley,3500.0,,KM,1.0,2015.0,2020.0,"A ripe, dense and rich red, with concentrated ..."
646,2014.0,47,2011,95.0,105,Luce della Vite,Toscana Luce,Red,Blend,Merlot and Sangiovese,Toscana,,1600.0,BS,,2017.0,2035.0,"Polished and complex, featuring ripe black che..."
663,2014.0,64,2013,90.0,15,Cune,Rioja White Monopole,White,Rioja,,Rioja,,4000.0,TM,1.0,2014.0,2017.0,"This focused white shows bright, juicy flavors..."
1862,2002.0,63,2001,90.0,15,Jackson,Sauvignon Blanc Marlborough,White,Sauvignon Blanc,,Marlborough,10000.0,,HS,1.0,2002.0,,A particularly generous New Zealand Sauvignon ...
2077,2000.0,78,1997,91.0,40,Chimney Rock,Cabernet Sauvignon Napa Valley,Red,Cabernet Sauvignon,,Napa Valley,12017.0,,JL,1.0,2000.0,2008.0,"Rich and layered, with ripe blackberry, cherry..."


In [9]:
df_GeoCache.sample(10)

Unnamed: 0,Geography,Hierarchy,Address
17,Coonawarra,Hierarchy_00,Australia
234,Vino Nobile di Montepulciano,Hierarchy_00,Italy
765,Corton Le Corton,Hierarchy_02,"Côte de Beaune, Burgundy, France"
780,Puligny-Montrachet Sous le Puits,Hierarchy_02,"Côte de Beaune, Burgundy, France"
194,Friuli-Venezia-Giulia,Hierarchy_00,Italy
749,St.-Estèphe,Hierarchy_02,"Médoc, Bordeaux, France"
1102,Mendocino County,Hierarchy_03,"Mendocino County, North Coast, California, USA"
10,Hunter Valley,Hierarchy_00,Australia
477,Mâcon-La Roche Vineuse,Hierarchy_01,"Burgundy, France"
1012,Chassagne-Montrachet Les Grandes Ruchottes,Hierarchy_03,"Chassagne-Montrachet, Côte de Beaune, Burgundy..."


In [10]:
df_GeoList.sample(10)

Unnamed: 0,Address
181,"Hunter Valley, New South Wales, Australia"
262,"Naoussa, Macedonia, Greece, Greece"
193,"Knights Valley, Sonoma County, North Coast, Ca..."
168,"Graves, Bordeaux, France"
116,"Corton Grand Cru, Côte de Beaune, Burgundy, Fr..."
331,"Rosso di Montalcino, Tuscany, Italy"
146,"El Dorado, Sierra Foothills, California, USA"
6,"Alentejo, Portugal"
191,"Kamptal, Austria"
348,"Santa Barbara County, Central Coast, Californi..."


### Geocode the Address dataframe
Reference: [Python’s geocoding — Convert a list of addresses into a map](https://towardsdatascience.com/pythons-geocoding-convert-a-list-of-addresses-into-a-map-f522ef513fd6)

In [11]:
# Initialize Nominatim into geolocator variable.
geolocator = Nominatim(user_agent='wine app')

In [12]:
geolocator.geocode('Castilla y León, Spain').raw

{'place_id': 258252333,
 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
 'osm_type': 'relation',
 'osm_id': 349041,
 'boundingbox': ['40.0824504', '43.2382034', '-7.077073', '-1.7753716'],
 'lat': '41.8037172',
 'lon': '-4.7471726',
 'display_name': 'Castilla y León, España',
 'class': 'boundary',
 'type': 'administrative',
 'importance': 0.9625997816800999,
 'icon': 'https://nominatim.openstreetmap.org/ui/mapicons//poi_boundary_administrative.p.20.png'}

In [13]:
geolocator.geocode('Castilla y León, Spain').point

Point(41.8037172, -4.7471726, 0.0)

In [14]:
# Apply geolocator to the Address column in the GeoList dataframe.
df_GeoList['loc'] = df_GeoList['Address'].apply(geolocator.geocode)

In [15]:
# Get .point containing lat/long from Geocode response, if not none.
df_GeoList['point'] = df_GeoList['loc'].apply(lambda loc: tuple(loc.point) if loc else None)

In [16]:
# Split the .point column into separate columns for lat, long, and altitude
df_GeoList[['lat', 'long', 'altitude']] = pd.DataFrame(df_GeoList['point'].to_list(), index=df_GeoList.index)

In [17]:
df_GeoList

Unnamed: 0,Address,loc,point,lat,long,altitude
0,"Abruzzo, Italy","(Abruzzo, Italia, (42.227681, 13.854983))","(42.227681, 13.854983, 0.0)",42.227681,13.854983,0.0
1,"Adelaide Hills, South Australia, Australia","(Adelaide Hills Council, South Australia, Aust...","(-34.901351649999995, 138.8293202817461, 0.0)",-34.901352,138.829320,0.0
2,"Aegean Islands, Greece","(Aegean, Σάμη - Αγία Ευφημία, Καραβόμυλος, Δήμ...","(38.2504094, 20.6304217, 0.0)",38.250409,20.630422,0.0
3,"Aglianico del Vulture, Basilicata, Italy",,,,,
4,"Agrelo, Mendoza, Argentina","(Agrelo, Distrito Agrelo, Departamento Luján d...","(-33.1184629, -68.8859261, 0.0)",-33.118463,-68.885926,0.0
5,"Alba, Piedmont | Piemonte, Italy",,,,,
6,"Alentejo, Portugal","(Alentejo, Portugal, (38.0551003, -7.8605799))","(38.0551003, -7.8605799, 0.0)",38.055100,-7.860580,0.0
7,"Alexander Valley, Sonoma County, North Coast, ...",,,,,
8,"Alicante, Valencia, Spain","(Alacant / Alicante, l'Alacantí, Alacant / Ali...","(38.353738, -0.4901846, 0.0)",38.353738,-0.490185,0.0
9,"Almansa, Castilla La Mancha, Spain","(Almansa, Albacete, Castilla-La Mancha, 02640,...","(38.8682065, -1.0978627, 0.0)",38.868206,-1.097863,0.0


### Append geography details to the GeoCache dataframe
Determine how well populated geography is at different hierarchy levels.

In [18]:
df_GeoCache = pd.merge(df_GeoCache, df_GeoList, on = 'Address', how = 'left' )

In [19]:
df_GeoCache.to_csv(path_or_buf = './GeoCache.csv', index = False)

### Append Hierarchy 00 details to the df_Wine dataset

In [20]:
# filter df_GeoCache to Hierarchy_00

df_GeoCache00 = df_GeoCache[
    (df_GeoCache.Hierarchy == 'Hierarchy_00')
]

df_GeoCache00.sample(10)

Unnamed: 0,Geography,Hierarchy,Address,loc,point,lat,long,altitude
33,Kremstal,Hierarchy_00,Austria,"(Österreich, (47.2000338, 13.199959))","(47.2000338, 13.199959, 0.0)",47.200034,13.199959,0.0
60,Moulis-en-Médoc,Hierarchy_00,France,"(France, (46.603354, 1.8883335))","(46.603354, 1.8883335, 0.0)",46.603354,1.888334,0.0
71,Bourgogne,Hierarchy_00,France,"(France, (46.603354, 1.8883335))","(46.603354, 1.8883335, 0.0)",46.603354,1.888334,0.0
35,Wachau,Hierarchy_00,Austria,"(Österreich, (47.2000338, 13.199959))","(47.2000338, 13.199959, 0.0)",47.200034,13.199959,0.0
99,Bonnes Mares,Hierarchy_00,France,"(France, (46.603354, 1.8883335))","(46.603354, 1.8883335, 0.0)",46.603354,1.888334,0.0
281,Cava,Hierarchy_00,Spain,"(España, (39.3260685, -4.8379791))","(39.3260685, -4.8379791, 0.0)",39.326068,-4.837979,0.0
46,Rapel Valley,Hierarchy_00,Chile,"(Chile, (-31.7613365, -71.3187697))","(-31.7613365, -71.3187697, 0.0)",-31.761336,-71.31877,0.0
205,Barbera d'Asti,Hierarchy_00,Italy,"(Italia, (42.6384261, 12.674297))","(42.6384261, 12.674297, 0.0)",42.638426,12.674297,0.0
314,Rutherford,Hierarchy_00,USA,"(United States, (39.7837304, -100.4458825))","(39.7837304, -100.4458825, 0.0)",39.78373,-100.445882,0.0
65,Pessac-Léognan,Hierarchy_00,France,"(France, (46.603354, 1.8883335))","(46.603354, 1.8883335, 0.0)",46.603354,1.888334,0.0


In [21]:
df_Wine00 = pd.merge(df_Wine, df_GeoCache00, on = 'Geography', how = 'left')

df_Wine00.sample(10)

Unnamed: 0,Review_Year,Rank,Vintage,Score,Price,Winemaker,Wine,Wine_Style,Grape_Blend,Blend_List,...,Best_Drink_from,Best_Drink_Through,Review,Hierarchy,Address,loc,point,lat,long,altitude
3121,1989.0,19,1986,95.0,29,Château La Dominique,St.-Emilion,Red,Blend,Bordeaux Blend Red,...,1996.0,,"Powerful, magnificent, complex and rich, with ...",Hierarchy_00,France,"(France, (46.603354, 1.8883335))","(46.603354, 1.8883335, 0.0)",46.603354,1.888334,0.0
3141,1989.0,39,1987,95.0,40,Diamond Creek,Cabernet Sauvignon Napa Valley Volcanic Hill,Red,Cabernet Sauvignon,,...,1998.0,2002.0,"Incredibly intense and concentrated, deep and ...",Hierarchy_00,USA,"(United States, (39.7837304, -100.4458825))","(39.7837304, -100.4458825, 0.0)",39.78373,-100.445882,0.0
1737,2003.0,36,2001,91.0,17,Yangarra Park,Shiraz McLaren Vale Appellation Series,Red,Shiraz | Syrah,,...,2003.0,2012.0,"Rich and plump, a flood of fruit, with powerfu...",Hierarchy_00,Australia,"(Australia, (-24.7761086, 134.755))","(-24.7761086, 134.755, 0.0)",-24.776109,134.755,0.0
2238,1998.0,14,1996,92.0,14,Meridian,Chardonnay Edna Valley Coastal Reserve,White,Chardonnay,,...,,,"Beautifully crafted, this California white is ...",Hierarchy_00,USA,"(United States, (39.7837304, -100.4458825))","(39.7837304, -100.4458825, 0.0)",39.78373,-100.445882,0.0
705,2013.0,4,2010,95.0,92,Hewitt,Cabernet Sauvignon Rutherford,Red,Cabernet Sauvignon,,...,2013.0,2026.0,"Deliciously pure, rich and deep, featuring tie...",Hierarchy_00,USA,"(United States, (39.7837304, -100.4458825))","(39.7837304, -100.4458825, 0.0)",39.78373,-100.445882,0.0
1102,2009.0,1,2005,95.0,27,Columbia Crest,Cabernet Sauvignon Columbia Valley Reserve,Red,Cabernet Sauvignon,,...,2010.0,2015.0,"Ripe in flavor, with a vivid array of black cu...",Hierarchy_00,USA,"(United States, (39.7837304, -100.4458825))","(39.7837304, -100.4458825, 0.0)",39.78373,-100.445882,0.0
1840,2002.0,39,1998,95.0,80,Paolo Scavino,Barolo Carobric,Red,Blend,Nebbiolo,...,2005.0,,"Very well-done. Bright fruit, with hints of dr...",Hierarchy_00,Italy,"(Italia, (42.6384261, 12.674297))","(42.6384261, 12.674297, 0.0)",42.638426,12.674297,0.0
2755,1993.0,53,1991,92.0,14,Byron,Chardonnay Santa Barbara County,White,Chardonnay,,...,1993.0,,"Packs in lots of flavor, with ripe, spicy hone...",Hierarchy_00,USA,"(United States, (39.7837304, -100.4458825))","(39.7837304, -100.4458825, 0.0)",39.78373,-100.445882,0.0
1115,2009.0,14,2007,94.0,60,Two Hands,Shiraz Barossa Valley Bella's Garden,Red,Shiraz | Syrah,,...,2010.0,2017.0,"Ripe, fleshy and generous, but not nearly over...",Hierarchy_00,Australia,"(Australia, (-24.7761086, 134.755))","(-24.7761086, 134.755, 0.0)",-24.776109,134.755,0.0
1158,2009.0,57,2006,91.0,20,Viña Santa Rita,Cabernet Sauvignon Maipo Valley Medalla Real S...,Red,Cabernet Sauvignon,,...,2010.0,2012.0,"A ripe, concentrated Chilean red, with a broad...",Hierarchy_00,Chile,"(Chile, (-31.7613365, -71.3187697))","(-31.7613365, -71.3187697, 0.0)",-31.761336,-71.31877,0.0


### Append Hierarchy 01 details to the df_Wine dataset

In [22]:
# filter df_GeoCache to Hierarchy_00

df_GeoCache01 = df_GeoCache[
    (df_GeoCache.Hierarchy == 'Hierarchy_01')
]

df_GeoCache01.sample(10)

Unnamed: 0,Geography,Hierarchy,Address,loc,point,lat,long,altitude
657,Santa Barbara County,Hierarchy_01,"California, USA","(California, United States, (36.7014631, -118....","(36.7014631, -118.755997, 0.0)",36.701463,-118.755997,0.0
676,Knights Valley,Hierarchy_01,"California, USA","(California, United States, (36.7014631, -118....","(36.7014631, -118.755997, 0.0)",36.701463,-118.755997,0.0
455,Musigny,Hierarchy_01,"Burgundy, France","(Bourgogne, France métropolitaine, France, (47...","(47.27808725, 4.222486304306048, 0.0)",47.278087,4.222486,0.0
460,Gevrey-Chambertin,Hierarchy_01,"Burgundy, France","(Bourgogne, France métropolitaine, France, (47...","(47.27808725, 4.222486304306048, 0.0)",47.278087,4.222486,0.0
430,Chassagne-Montrachet En Remilly,Hierarchy_01,"Burgundy, France","(Bourgogne, France métropolitaine, France, (47...","(47.27808725, 4.222486304306048, 0.0)",47.278087,4.222486,0.0
481,Arbois Pupillin,Hierarchy_01,"Jura, France","(Jura, Bourgogne-Franche-Comté, France métropo...","(46.783362499999996, 5.783285726354901, 0.0)",46.783362,5.783286,0.0
546,Emilia IGT,Hierarchy_01,"Emilia-Romagna, Italy","(Emilia-Romagna, Italia, (44.525696, 11.039437))","(44.525696, 11.039437, 0.0)",44.525696,11.039437,0.0
389,Niederösterreich,Hierarchy_01,"Niederösterreich, Austria","(Niederösterreich, Österreich, (48.2817813, 15...","(48.2817813, 15.7632457, 0.0)",48.281781,15.763246,0.0
369,Barossa Valley,Hierarchy_01,"South Australia, Australia","(South Australia, Australia, (-30.5343665, 135...","(-30.5343665, 135.6301212, 0.0)",-30.534367,135.630121,0.0
556,Molise,Hierarchy_01,"Molise, Italy","(Molise, Italia, (41.684672, 14.595614))","(41.684672, 14.595614, 0.0)",41.684672,14.595614,0.0


In [23]:
df_Wine01 = pd.merge(df_Wine, df_GeoCache01, on = 'Geography', how = 'left')

df_Wine01.sample(10)

Unnamed: 0,Review_Year,Rank,Vintage,Score,Price,Winemaker,Wine,Wine_Style,Grape_Blend,Blend_List,...,Best_Drink_from,Best_Drink_Through,Review,Hierarchy,Address,loc,point,lat,long,altitude
873,2012.0,72,2010,93.0,35,Turley,Zinfandel Paso Robles Pesenti Vineyard,Red,Zinfandel,,...,2012.0,2020.0,That rare Zin that shows both richness and a s...,Hierarchy_01,"California, USA","(California, United States, (36.7014631, -118....","(36.7014631, -118.755997, 0.0)",36.701463,-118.755997,0.0
2947,1991.0,45,1989,94.0,45,Philippe Pichon,Condrieu,Dessert & Fortified,Viognier,,...,,,"Ripe, rich and seductive, a plush pillow of a ...",Hierarchy_01,"Rhône, France","(Rhône, Circonscription départementale du Rhôn...","(45.8802348, 4.564533629559522, 0.0)",45.880235,4.564534,0.0
2646,1994.0,44,1992,91.0,12,Wynns Coonawarra Estate,Chardonnay Coonawarra,White,Chardonnay,,...,1994.0,1997.0,"Ripe and distinctive, expanding and accelerati...",Hierarchy_01,"South Australia, Australia","(South Australia, Australia, (-30.5343665, 135...","(-30.5343665, 135.6301212, 0.0)",-30.534367,135.630121,0.0
1285,2008.0,84,2004,91.0,39,Stefano Farina,Barolo,Red,Blend,Nebbiolo,...,2011.0,,"Plum skin on the nose, with a hint of licorice...",Hierarchy_01,"Piedmont | Piemonte, Italy","(Piedmont Properties, 78, SP50, San Marzano Ol...","(44.7605629, 8.2998538, 0.0)",44.760563,8.299854,0.0
1542,2005.0,41,2004,91.0,11,St.-Urbans-Hof,Riesling QbA Mosel-Saar-Ruwer,White,Riesling,,...,2005.0,2010.0,"Very seductive from the smoky, mineral-tinged ...",Hierarchy_01,"Mosel, Germany","(Mosel, Lützel, Koblenz, Rheinland-Pfalz, 5607...","(50.3659752, 7.5858251, 0.0)",50.365975,7.585825,0.0
1605,2004.0,4,2000,98.0,105,Paolo Scavino,Barolo Bric dël Fiasc,Red,Blend,Nebbiolo,...,2010.0,,"Very, very ripe fruit, with strawberries and p...",Hierarchy_01,"Piedmont | Piemonte, Italy","(Piedmont Properties, 78, SP50, San Marzano Ol...","(44.7605629, 8.2998538, 0.0)",44.760563,8.299854,0.0
3144,1989.0,42,1985,95.0,55,Opus One,Napa Valley,Red,Blend,Bordeaux Blend Red,...,1994.0,1998.0,"Full throttle Cabernet, intense, deep, rich, a...",Hierarchy_01,"California, USA","(California, United States, (36.7014631, -118....","(36.7014631, -118.755997, 0.0)",36.701463,-118.755997,0.0
2958,1991.0,56,1988,93.0,18,Revere,Chardonnay Napa Valley Reserve,White,Chardonnay,,...,1995.0,,"Layers of smoke, vanilla, spice and pear flavo...",Hierarchy_01,"California, USA","(California, United States, (36.7014631, -118....","(36.7014631, -118.755997, 0.0)",36.701463,-118.755997,0.0
699,2014.0,98,2012,90.0,19,Domaine Terlato & Chapoutier,Shiraz-Viognier Victoria,Red,Blend,Shiraz-Viognier,...,2016.0,2020.0,"Vibrant, expressive and distinctive, with citr...",Hierarchy_01,"Victoria, Australia","(Victoria, Australia, (-36.5986096, 144.6780052))","(-36.5986096, 144.6780052, 0.0)",-36.59861,144.678005,0.0
1855,2002.0,54,2001,91.0,26,Cloudy Bay,Sauvignon Blanc Marlborough,White,Sauvignon Blanc,,...,2002.0,,"Lively, jazzy style absolutely brims with racy...",Hierarchy_01,"Marlborough, New Zealand","(Marlborough, New Zealand / Aotearoa, (-41.474...","(-41.47447475, 173.8330262577606, 0.0)",-41.474475,173.833026,0.0


### Save files for use in other notebooks

In [24]:
# Remove duplicates by index: 2017 (46), 2015 (73), 1995 (94)
df_Wine00 = df_Wine00.drop([df_Wine00.index[2596], df_Wine00.index[574], df_Wine00.index[346]])
df_Wine01 = df_Wine01.drop([df_Wine00.index[2596], df_Wine00.index[574], df_Wine00.index[346]])

In [25]:
df_Wine00.shape

(3301, 25)

In [26]:
df_Wine01.shape

(3301, 25)

In [27]:
df_Wine00.to_csv(path_or_buf = './Wine_Hier00.csv', index = False)
df_Wine01.to_csv(path_or_buf = './Wine_Hier01.csv', index = False)