# GeoCache: *Wine Spectator*'s Top 100 Wines, 1988-2020
List available online on *Wine Spectator*'s [Top 100 Lists web site](https://top100.winespectator.com/lists/).

## File Setup

In [1]:
# import and initialize main python libraries
import numpy as np
import pandas as pd
import shapefile as shp
import matplotlib.pyplot as plt
import seaborn as sns

# import libraries for file navigation
import os
import shutil
import glob
from pandas_ods_reader import read_ods

# import other packages
from scipy import stats
from sklearn import linear_model

# import geo packages
import geopandas as gpd
import descartes
from shapely.geometry import Point, Polygon

# import Geopy packages
import geopy
from geopy.geocoders import Nominatim

In [2]:
# initialize vizualization set
sns.set(style="whitegrid", palette="colorblind", color_codes=True)
sns.mpl.rc("figure", figsize=(10, 6))

# Jupyter Notebook
%matplotlib inline

## Dataframe Exploration

In [3]:
# Note: save CSV files in UTF-8 format to preserve special characters.
df_Wine = pd.read_csv('./CSV_Wines.csv')
df_GeoCache = pd.read_csv('./CSV_GeoCache.csv')
df_GeoList = pd.read_csv('./CSV_GeoList.csv')

In [4]:
df_Wine.shape

(3301, 18)

In [5]:
df_Wine.dtypes

Review_Year           float64
Rank                   object
Vintage                object
Score                 float64
Price                  object
Winemaker              object
Wine                   object
Wine_Style             object
Grape_Blend            object
Blend_List             object
Geography              object
Cases_Made            float64
Cases_Imported        float64
Reviewer               object
Drink_now             float64
Best_Drink_from       float64
Best_Drink_Through    float64
Review                 object
dtype: object

In [6]:
df_GeoCache.shape

(1224, 3)

In [7]:
df_GeoList.shape

(445, 1)

In [8]:
df_Wine.sample(10)

Unnamed: 0,Review_Year,Rank,Vintage,Score,Price,Winemaker,Wine,Wine_Style,Grape_Blend,Blend_List,Geography,Cases_Made,Cases_Imported,Reviewer,Drink_now,Best_Drink_from,Best_Drink_Through,Review
1984,2001.0,85,1999,90.0,23,Fattoria di Felsina,Chianti Classico Berardenga,Red,Chianti,,Chianti,14165.0,,JS,,2002.0,2005.0,"Lots of dried cherry and crushed black fruit, ..."
2475,1996.0,76,1994,90.0,12,J. Lohr,Chardonnay Monterey Riverstone,White,Chardonnay,,Monterey,86000.0,,,,,,"A complex array of elegant pear, spice, honey ..."
496,2016.0,97,2013,91.0,34,Le Macchiole,Bolgheri,Red,Blend,"Merlot, Cabernet Franc and Syrah",Bolgheri,,900.0,BS,,2018.0,2024.0,"Sleek and elegant, with concentrated flavors o..."
37,2020.0,38,2017,94.0,35,Bodegas Juan Gil,Jumilla Blue Label,Red,Blend,"Monastrell, Cabernet Sauvignon and Syrah.",Jumilla,2500.0,1250.0,TM,1.0,2020.0,2032.0,"This brawny red shows a savory character, with..."
485,2016.0,86,2012,93.0,36,Vigneti del Vulture,Aglianico del Vulture Piano del Cerro,Red,Aglianico,,Aglianico del Vulture,5000.0,,AN,1.0,2016.0,2027.0,A dense and tarry red that glides on the silky...
2995,1991.0,96,1982,90.0,14,Lar de Lares,Tierra de Barros Gran Reserva,Red,Tempranillo,,Ribera del Guadiana,3500.0,,,,,,"Distinctive, rich, mature and elegant, with pl..."
1685,2004.0,86,2002,90.0,12,La Valentina,Montepulciano d'Abruzzo,Red,Montepulciano,,Abruzzo,11600.0,,JS,1.0,2004.0,2008.0,Plenty of soft and spicy character with hints ...
1025,2010.0,26,2005,94.0,35,Bodegas Resalte de Peñafiel,Ribera del Duero Crianza,Red,Tempranillo,,Ribera del Duero,5000.0,,TM,1.0,2010.0,2016.0,"Fresh and focused, this firm red delivers ripe..."
2519,1995.0,20,1992,93.0,30,Anderson's Conn Valley,Cabernet Sauvignon Napa Valley Estate Reserve,Red,Cabernet Sauvignon,,Napa Valley,4243.0,,,,1999.0,,"A beautifully crafted red with rich, complex c..."
2179,1999.0,80,1997,90.0,18,The Hess Collection,Chardonnay Napa Valley,White,Chardonnay,,Napa Valley,30000.0,,JL,1.0,1999.0,2003.0,"Bright and inviting, the lively intensity of t..."


In [9]:
df_GeoCache.sample(10)

Unnamed: 0,Geography,Hierarchy,Address
482,Côtes du Jura,Hierarchy_01,"Jura, France"
176,Macedonia – Greece,Hierarchy_00,Greece
1109,Spring Mountain District,Hierarchy_03,"Napa County, North Coast, California, USA"
903,Valpolicella Ripasso,Hierarchy_02,"Valpolicella, Veneto, Italy"
710,Agrelo,Hierarchy_02,"Agrelo, Mendoza, Argentina"
1040,Gevrey-Chambertin,Hierarchy_03,"Gevrey-Chambertin, Côte de Nuits, Burgundy, Fr..."
792,Marsannay,Hierarchy_02,"Côte de Nuits, Burgundy, France"
943,Santa Ynez Valley,Hierarchy_02,"Central Coast, California, USA"
74,Beaune,Hierarchy_00,France
794,Clos de Tart,Hierarchy_02,"Côte de Nuits, Burgundy, France"


In [10]:
df_GeoList.sample(10)

Unnamed: 0,Address
379,"St.-Chinian, Languedoc-Roussillon, France"
133,"Côtes du Roussillon-Villages, Languedoc-Roussi..."
88,"Chassagne-Montrachet Morgeot, Chassagne-Montra..."
320,"Rhodes, Aegean Islands, Greece"
134,"Crémant de Bourgogne, Bourgogne, Burgundy, France"
149,"Eola-Amity Hills, Willamette Valley, Oregon, USA"
68,"Carneros, Napa Valley, Napa County, North Coas..."
6,"Alentejo, Portugal"
145,"Edna Valley, San Luis Obispo County, Central C..."
344,"San Rafael, Mendoza, Argentina"


### Geocode the Address dataframe
Reference: [Python’s geocoding — Convert a list of addresses into a map](https://towardsdatascience.com/pythons-geocoding-convert-a-list-of-addresses-into-a-map-f522ef513fd6)

In [11]:
# Initialize Nominatim into geolocator variable.
geolocator = Nominatim(user_agent='wine app')

In [12]:
geolocator.geocode('Castilla y León, Spain').raw

{'place_id': 258252333,
 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
 'osm_type': 'relation',
 'osm_id': 349041,
 'boundingbox': ['40.0824504', '43.2382034', '-7.077073', '-1.7753716'],
 'lat': '41.8037172',
 'lon': '-4.7471726',
 'display_name': 'Castilla y León, España',
 'class': 'boundary',
 'type': 'administrative',
 'importance': 0.9625997816800999,
 'icon': 'https://nominatim.openstreetmap.org/ui/mapicons//poi_boundary_administrative.p.20.png'}

In [13]:
geolocator.geocode('Castilla y León, Spain').point

Point(41.8037172, -4.7471726, 0.0)

In [14]:
# Apply geolocator to the Address column in the GeoList dataframe.
df_GeoList['loc'] = df_GeoList['Address'].apply(geolocator.geocode)

In [15]:
# Get .point containing lat/long from Geocode response, if not none.
df_GeoList['point'] = df_GeoList['loc'].apply(lambda loc: tuple(loc.point) if loc else None)

In [16]:
# Split the .point column into separate columns for lat, long, and altitude
df_GeoList[['lat', 'long', 'altitude']] = pd.DataFrame(df_GeoList['point'].to_list(), index=df_GeoList.index)

In [17]:
df_GeoList

Unnamed: 0,Address,loc,point,lat,long,altitude
0,"Abruzzo, Italy","(Abruzzo, Italia, (42.227681, 13.854983))","(42.227681, 13.854983, 0.0)",42.227681,13.854983,0.0
1,"Adelaide Hills, South Australia, Australia","(Adelaide Hills Council, South Australia, Aust...","(-34.901351649999995, 138.8293202817461, 0.0)",-34.901352,138.829320,0.0
2,"Aegean Islands, Greece","(Aegean, Σάμη - Αγία Ευφημία, Καραβόμυλος, Δήμ...","(38.2504094, 20.6304217, 0.0)",38.250409,20.630422,0.0
3,"Aglianico del Vulture, Basilicata, Italy",,,,,
4,"Agrelo, Mendoza, Argentina","(Agrelo, Distrito Agrelo, Departamento Luján d...","(-33.1184629, -68.8859261, 0.0)",-33.118463,-68.885926,0.0
5,"Alba, Piedmont | Piemonte, Italy",,,,,
6,"Alentejo, Portugal","(Alentejo, Portugal, (38.0551003, -7.8605799))","(38.0551003, -7.8605799, 0.0)",38.055100,-7.860580,0.0
7,"Alexander Valley, Sonoma County, North Coast, ...",,,,,
8,"Alicante, Valencia, Spain","(Alacant / Alicante, l'Alacantí, Alacant / Ali...","(38.353738, -0.4901846, 0.0)",38.353738,-0.490185,0.0
9,"Almansa, Castilla La Mancha, Spain","(Almansa, Albacete, Castilla-La Mancha, 02640,...","(38.8682065, -1.0978627, 0.0)",38.868206,-1.097863,0.0


### Append geography details to the GeoCache dataframe
Determine how well populated geography is at different hierarchy levels.

In [18]:
df_GeoCache = pd.merge(df_GeoCache, df_GeoList, on = 'Address', how = 'left' )

In [19]:
df_GeoCache.to_csv(path_or_buf = './GeoCache.csv', index = False)

### Append Hierarchy 00 details to the df_Wine dataset

In [20]:
# filter df_GeoCache to Hierarchy_00

df_GeoCache00 = df_GeoCache[
    (df_GeoCache.Hierarchy == 'Hierarchy_00')
]

df_GeoCache00.sample(10)

Unnamed: 0,Geography,Hierarchy,Address,loc,point,lat,long,altitude
225,Chianti,Hierarchy_00,Italy,"(Italia, (42.6384261, 12.674297))","(42.6384261, 12.674297, 0.0)",42.638426,12.674297,0.0
348,Côtes de Provence,Hierarchy_00,France,"(France, (46.603354, 1.8883335))","(46.603354, 1.8883335, 0.0)",46.603354,1.888334,0.0
70,Crémant de Bourgogne,Hierarchy_00,France,"(France, (46.603354, 1.8883335))","(46.603354, 1.8883335, 0.0)",46.603354,1.888334,0.0
321,Knights Valley,Hierarchy_00,USA,"(United States, (39.7837304, -100.4458825))","(39.7837304, -100.4458825, 0.0)",39.78373,-100.445882,0.0
344,Walla Walla Valley,Hierarchy_00,USA,"(United States, (39.7837304, -100.4458825))","(39.7837304, -100.4458825, 0.0)",39.78373,-100.445882,0.0
76,Chassagne-Montrachet Les Chenevottes,Hierarchy_00,France,"(France, (46.603354, 1.8883335))","(46.603354, 1.8883335, 0.0)",46.603354,1.888334,0.0
248,Gisborne,Hierarchy_00,New Zealand,"(New Zealand / Aotearoa, (-41.5000831, 172.834...","(-41.5000831, 172.8344077, 0.0)",-41.500083,172.834408,0.0
210,Piemonte,Hierarchy_00,Italy,"(Italia, (42.6384261, 12.674297))","(42.6384261, 12.674297, 0.0)",42.638426,12.674297,0.0
186,Greco di Tufo,Hierarchy_00,Italy,"(Italia, (42.6384261, 12.674297))","(42.6384261, 12.674297, 0.0)",42.638426,12.674297,0.0
34,Niederösterreich,Hierarchy_00,Austria,"(Österreich, (47.2000338, 13.199959))","(47.2000338, 13.199959, 0.0)",47.200034,13.199959,0.0


In [21]:
df_Wine00 = pd.merge(df_Wine, df_GeoCache00, on = 'Geography', how = 'left')

df_Wine00.sample(10)

Unnamed: 0,Review_Year,Rank,Vintage,Score,Price,Winemaker,Wine,Wine_Style,Grape_Blend,Blend_List,...,Best_Drink_from,Best_Drink_Through,Review,Hierarchy,Address,loc,point,lat,long,altitude
1529,2005.0,28,2002,96.0,65,Leeuwin,Chardonnay Margaret River Art Series,White,Chardonnay,,...,2005.0,2020.0,A wine of marvelous richness and unexpected su...,Hierarchy_00,Australia,"(Australia, (-24.7761086, 134.755))","(-24.7761086, 134.755, 0.0)",-24.776109,134.755,0.0
667,2014.0,66,2012,91.0,17,Fowles,Shiraz Victoria Are You Game?,Red,Shiraz | Syrah,,...,2014.0,2018.0,"Vivid, peppery and appealing, with cherry and ...",Hierarchy_00,Australia,"(Australia, (-24.7761086, 134.755))","(-24.7761086, 134.755, 0.0)",-24.776109,134.755,0.0
2197,1999.0,96,1996,90.0,25,Château d'Issan,Margaux,Red,Blend,Bordeaux Blend Red,...,2000.0,,"Plenty of blackberry, currant and cherry chara...",Hierarchy_00,France,"(France, (46.603354, 1.8883335))","(46.603354, 1.8883335, 0.0)",46.603354,1.888334,0.0
2630,1994.0,28,1990,93.0,45,Paolo Scavino,Barolo Bric dël Fiasc,Red,Blend,Nebbiolo,...,2000.0,,"Tannic, flavorful and complex, cascading its l...",Hierarchy_00,Italy,"(Italia, (42.6384261, 12.674297))","(42.6384261, 12.674297, 0.0)",42.638426,12.674297,0.0
784,2013.0,83,2010,96.0,130,Caymus,Cabernet Sauvignon Napa Valley Special Selection,Red,Cabernet Sauvignon,,...,2013.0,2025.0,"A seductive style that's openly fruity, showin...",Hierarchy_00,USA,"(United States, (39.7837304, -100.4458825))","(39.7837304, -100.4458825, 0.0)",39.78373,-100.445882,0.0
1968,2001.0,67,1999,92.0,37,Shafer,Chardonnay Napa Valley Carneros Red Shoulder R...,White,Chardonnay,,...,2001.0,2005.0,"Sleek, rich and elegant, this Chardonnay offer...",Hierarchy_00,USA,"(United States, (39.7837304, -100.4458825))","(39.7837304, -100.4458825, 0.0)",39.78373,-100.445882,0.0
1807,2002.0,6,1999,94.0,50,Duckhorn,Cabernet Sauvignon Napa Valley,Red,Cabernet Sauvignon,,...,2004.0,2012.0,"Dusty berry, currant, anise and cedary oak aro...",Hierarchy_00,USA,"(United States, (39.7837304, -100.4458825))","(39.7837304, -100.4458825, 0.0)",39.78373,-100.445882,0.0
140,2019.0,41,2017,93.0,24,Carol Shelton,Coquille Blanc Paso Robles,White,Blend,"Grenache Blanc, Roussanne, Viognier and Marsan...",...,2019.0,2024.0,"Fresh and spicy pear, lime and green apple fla...",Hierarchy_00,USA,"(United States, (39.7837304, -100.4458825))","(39.7837304, -100.4458825, 0.0)",39.78373,-100.445882,0.0
1464,2006.0,63,2003,92.0,38,Château Prieuré-Lichine,Margaux,Red,Blend,Bordeaux Blend Red,...,2011.0,,Aromas of blackberry and smoke with hints of t...,Hierarchy_00,France,"(France, (46.603354, 1.8883335))","(46.603354, 1.8883335, 0.0)",46.603354,1.888334,0.0
1978,2001.0,77,1997,93.0,40,Trimbach,Riesling Alsace Cuvée Frédéric Émile,White,Riesling,,...,2001.0,2008.0,"Ultraseductive, this Riesling offers suave not...",Hierarchy_00,France,"(France, (46.603354, 1.8883335))","(46.603354, 1.8883335, 0.0)",46.603354,1.888334,0.0


### Append Hierarchy 01 details to the df_Wine dataset

In [22]:
# filter df_GeoCache to Hierarchy_00

df_GeoCache01 = df_GeoCache[
    (df_GeoCache.Hierarchy == 'Hierarchy_01')
]

df_GeoCache01.sample(10)

Unnamed: 0,Geography,Hierarchy,Address,loc,point,lat,long,altitude
495,Pouilly-Fumé,Hierarchy_01,"Loire, France","(Loire, Auvergne-Rhône-Alpes, France métropoli...","(45.75385355, 4.045473682551104, 0.0)",45.753854,4.045474,0.0
651,Arroyo Grande Valley,Hierarchy_01,"California, USA","(California, United States, (36.7014631, -118....","(36.7014631, -118.755997, 0.0)",36.701463,-118.755997,0.0
484,Côtes du Roussillon-Villages,Hierarchy_01,"Languedoc-Roussillon, France","(Languedoc-Roussillon, France métropolitaine, ...","(43.65420305, 3.674669940206605, 0.0)",43.654203,3.67467,0.0
658,Central Coast,Hierarchy_01,"California, USA","(California, United States, (36.7014631, -118....","(36.7014631, -118.755997, 0.0)",36.701463,-118.755997,0.0
388,Kremstal,Hierarchy_01,"Kremstal, Austria","(Inzersdorf im Kremstal, Bezirk Kirchdorf, Obe...","(47.9263917, 14.0780469, 0.0)",47.926392,14.078047,0.0
692,Ribbon Ridge,Hierarchy_01,"Oregon, USA","(Oregon, United States, (43.9792797, -120.7372...","(43.9792797, -120.737257, 0.0)",43.97928,-120.737257,0.0
358,San Rafael,Hierarchy_01,"Mendoza, Argentina","(Mendoza, Argentina, (-34.787093049999996, -68...","(-34.787093049999996, -68.43818677312292, 0.0)",-34.787093,-68.438187,0.0
448,Puligny-Montrachet Les Folatières,Hierarchy_01,"Burgundy, France","(Bourgogne, France métropolitaine, France, (47...","(47.27808725, 4.222486304306048, 0.0)",47.278087,4.222486,0.0
482,Côtes du Jura,Hierarchy_01,"Jura, France","(Jura, Bourgogne-Franche-Comté, France métropo...","(46.783362499999996, 5.783285726354901, 0.0)",46.783362,5.783286,0.0
551,Sebino IGT,Hierarchy_01,"Lombardy, Italy","(Lombardia, Italia, (45.5703694, 9.7732524))","(45.5703694, 9.7732524, 0.0)",45.570369,9.773252,0.0


In [23]:
df_Wine01 = pd.merge(df_Wine, df_GeoCache01, on = 'Geography', how = 'left')

df_Wine01.sample(10)

Unnamed: 0,Review_Year,Rank,Vintage,Score,Price,Winemaker,Wine,Wine_Style,Grape_Blend,Blend_List,...,Best_Drink_from,Best_Drink_Through,Review,Hierarchy,Address,loc,point,lat,long,altitude
1509,2005.0,8,1999,96.0,125,Castello Banfi,Brunello di Montalcino Poggio all'Oro Riserva,Red,Brunello di Montalcino,,...,2005.0,2015.0,Intense aromas of smoke and coffee open to lay...,Hierarchy_01,"Tuscany, Italy","(Toscana, Italia, (43.4586541, 11.1389204))","(43.4586541, 11.1389204, 0.0)",43.458654,11.13892,0.0
2004,2000.0,3,1997,97.0,60,Whitehall Lane,Cabernet Sauvignon Napa Valley Reserve,Red,Cabernet Sauvignon,,...,2002.0,2012.0,"Deliciously ripe, rich and deeply flavored, wi...",Hierarchy_01,"California, USA","(California, United States, (36.7014631, -118....","(36.7014631, -118.755997, 0.0)",36.701463,-118.755997,0.0
2016,2000.0,15,1998,95.0,29,Domaine Santa Duc,Gigondas,Red,Blend,Southern Rhone Red Blend,...,2003.0,2020.0,"Distinguished, deep and complex Rhône, offerin...",Hierarchy_01,"Rhône, France","(Rhône, Circonscription départementale du Rhôn...","(45.8802348, 4.564533629559522, 0.0)",45.880235,4.564534,0.0
892,2012.0,91,2010,93.0,58,Kosta Browne,Pinot Noir Russian River Valley,Red,Pinot Noir,,...,2013.0,2023.0,"Rich and expressive, with bold layers of ripe ...",Hierarchy_01,"California, USA","(California, United States, (36.7014631, -118....","(36.7014631, -118.755997, 0.0)",36.701463,-118.755997,0.0
1524,2005.0,23,1999,97.0,99,Marchesi de' Frescobaldi,Brunello di Montalcino Castelgiocondo Ripe al ...,Red,Brunello di Montalcino,,...,2006.0,,Very structured. Superattractive aromas of plu...,Hierarchy_01,"Tuscany, Italy","(Toscana, Italia, (43.4586541, 11.1389204))","(43.4586541, 11.1389204, 0.0)",43.458654,11.13892,0.0
1804,2002.0,3,1997,94.0,50,Castello Banfi,Brunello di Montalcino,Red,Brunello di Montalcino,,...,2003.0,,"A Brunello for everyone. Solid and focused, wi...",Hierarchy_01,"Tuscany, Italy","(Toscana, Italia, (43.4586541, 11.1389204))","(43.4586541, 11.1389204, 0.0)",43.458654,11.13892,0.0
863,2012.0,62,2009,91.0,19,Viña Ninquén,Syrah Colchagua Valley Antu,Red,Shiraz | Syrah,,...,2012.0,2015.0,"This dark, ripe red delivers jammy layered fla...",Hierarchy_01,"Colchagua Valley, Chile","(Colchagua, Palmilla, Provincia de Colchagua, ...","(-34.548228, -71.4013194, 0.0)",-34.548228,-71.401319,0.0
679,2014.0,78,2012,90.0,20,Acrobat,Pinot Noir Oregon,Red,Pinot Noir,,...,2014.0,2016.0,The crisp tannins and sleek structure give the...,Hierarchy_01,"Oregon, USA","(Oregon, United States, (43.9792797, -120.7372...","(43.9792797, -120.737257, 0.0)",43.97928,-120.737257,0.0
2045,2000.0,44,1997,95.0,70,Etude,Cabernet Sauvignon Napa Valley,Red,Cabernet Sauvignon,,...,2002.0,2016.0,"Great structure, deliciously rich and complex,...",Hierarchy_01,"California, USA","(California, United States, (36.7014631, -118....","(36.7014631, -118.755997, 0.0)",36.701463,-118.755997,0.0
2847,1992.0,45,1990,92.0,16,Ravenswood,Zinfandel Napa Valley Dickerson,Red,Zinfandel,,...,1992.0,1997.0,"A delicious young Zin that is ripe, rich tanni...",Hierarchy_01,"California, USA","(California, United States, (36.7014631, -118....","(36.7014631, -118.755997, 0.0)",36.701463,-118.755997,0.0


### Save files for use in other notebooks

In [24]:
# Remove duplicates by index: 2017 (46), 2015 (73), 1995 (94)
df_Wine00 = df_Wine00.drop([df_Wine00.index[2596], df_Wine00.index[574], df_Wine00.index[346]])
df_Wine01 = df_Wine01.drop([df_Wine00.index[2596], df_Wine00.index[574], df_Wine00.index[346]])

In [25]:
df_Wine00.shape

(3301, 25)

In [26]:
df_Wine01.shape

(3301, 25)

In [27]:
df_Wine00.to_csv(path_or_buf = './Wine_Hier00.csv', index = False)
df_Wine01.to_csv(path_or_buf = './Wine_Hier01.csv', index = False)