# GeoCache: *Wine Spectator*'s Top 100 Wines, 1988-2020
List available online on *Wine Spectator*'s [Top 100 Lists web site](https://top100.winespectator.com/lists/).

## File Setup

In [1]:
# import and initialize main python libraries
import numpy as np
import pandas as pd
import shapefile as shp
import matplotlib.pyplot as plt
import seaborn as sns

# import libraries for file navigation
import os
import shutil
import glob
from pandas_ods_reader import read_ods

# import other packages
from scipy import stats
from sklearn import linear_model

# import geo packages
import geopandas as gpd
import descartes
from shapely.geometry import Point, Polygon

# import Geopy packages
import geopy
from geopy.geocoders import Nominatim

In [2]:
# initialize vizualization set
sns.set(style="whitegrid", palette="colorblind", color_codes=True)
sns.mpl.rc("figure", figsize=(10, 6))

# Jupyter Notebook
%matplotlib inline

## Dataframe Exploration

In [3]:
# Note: save CSV files in UTF-8 format to preserve special characters.
df_Wine = pd.read_csv('./CSV_Wines.csv')
df_GeoCache = pd.read_csv('./CSV_GeoCache.csv')
df_GeoList = pd.read_csv('./CSV_GeoList.csv')

In [4]:
df_Wine.shape

(3301, 18)

In [5]:
df_Wine.dtypes

Review_Year           float64
Rank                   object
Vintage                object
Score                 float64
Price                  object
Winemaker              object
Wine                   object
Wine_Style             object
Grape_Blend            object
Blend_List             object
Geography              object
Cases_Made            float64
Cases_Imported        float64
Reviewer               object
Drink_now             float64
Best_Drink_from       float64
Best_Drink_Through    float64
Review                 object
dtype: object

In [6]:
df_GeoCache.shape

(1226, 3)

In [7]:
df_GeoList.shape

(448, 1)

In [8]:
df_Wine.sample(10)

Unnamed: 0,Review_Year,Rank,Vintage,Score,Price,Winemaker,Wine,Wine_Style,Grape_Blend,Blend_List,Geography,Cases_Made,Cases_Imported,Reviewer,Drink_now,Best_Drink_from,Best_Drink_Through,Review
2931,1991.0,32,1988,94.0,27,Château Pavie-Decesse,St.-Emilion,Red,Blend,Bordeaux Blend Red,St.-Emilion,4500.0,,,,1996.0,2000.0,Brilliantly concentrated from beginning to end...
1228,2008.0,29,2005,96.0,100,A. Clape,Cornas,Red,Shiraz | Syrah,,Cornas,1330.0,,JM,,2012.0,2028.0,"A dense, racy red, with a terrific beam of ras..."
1030,2010.0,31,2004,96.0,95,Terralsole,Brunello di Montalcino Riserva,Red,Brunello di Montalcino,,Brunello di Montalcino,1450.0,,JS,,2011.0,,"This changes all the time as you taste it, wit..."
220,2018.0,21,2016,93.0,18,Scott Base,Pinot Noir Central Otago,Red,Pinot Noir,,Central Otago,5000.0,,MW,1.0,2018.0,2028.0,"Smooth, silky raspberry coulis and maraschino ..."
2760,1993.0,61,1990,91.0,9,Michele Chiarlo,Barbera d'Asti,Red,Barbera,,Barbera d'Asti,11000.0,,,1.0,1993.0,1996.0,"Peppery, spicy notes invigorate this smooth-te..."
2055,2000.0,56,1997,93.0,45,Swanson,Alexis Napa Valley,Red,Cabernet Sauvignon,,Napa Valley,3400.0,,JL,,2002.0,2010.0,Weaves together a complex array of rich black ...
1720,2003.0,21,2000,92.0,13,Château de Flaugergues,Coteaux du Languedoc La Méjanelle Cuvée Sommel...,Red,Blend,Southern Rhone Red Blend,Languedoc,6000.0,,KM,1.0,2003.0,2005.0,"Full-bore, decadent red, with dark plum and bl..."
2488,1996.0,89,1993,90.0,16,Saxenburg,Shiraz Stellenbosch Private Collection,Red,Shiraz | Syrah,,Stellenbosch,800.0,,,1.0,1996.0,1999.0,"Gorgeous, deeply flavored red sporting the exu..."
1022,2010.0,23,2008,97.0,45,Owen Roe,Syrah Yakima Valley Red Willow Vineyard Chapel...,Red,Shiraz | Syrah,,Yakima Valley,359.0,,HS,1.0,2010.0,2016.0,"Warm, inviting and impressive for the purity o..."
1736,2003.0,37,2001,92.0,18,Dr. Loosen,Riesling Kabinett Mosel-Saar-Ruwer Wehlener So...,White,Riesling,,Mosel,1000.0,,BS,1.0,2003.0,2012.0,Gorgeous balance and grace in this peach- and ...


In [9]:
df_GeoCache.sample(10)

Unnamed: 0,Geography,Hierarchy,Address
428,Mercurey,Hierarchy_01,"Burgundy, France"
592,Montefalco,Hierarchy_01,"Umbria, Italy"
459,Charmes-Chambertin,Hierarchy_01,"Burgundy, France"
718,Mudgee,Hierarchy_02,"Mudgee, New South Wales, Australia"
1059,Minervois La Livinière,Hierarchy_03,"Minervois La Livinière, Minervois, Languedoc-R..."
320,Dry Creek Valley,Hierarchy_00,USA
16,Clare Valley,Hierarchy_00,Australia
1015,Corton Les Renardes,Hierarchy_03,"Corton Grand Cru, Côte de Beaune, Burgundy, Fr..."
220,Vallagarina IGT,Hierarchy_00,Italy
122,Mâcon-La Roche Vineuse,Hierarchy_00,France


In [10]:
df_GeoList.sample(10)

Unnamed: 0,Address
191,"Kamptal, Austria"
218,"Madeira Bual, Madeira, Portugal"
382,"St.-Joseph, Rhône, France"
7,"Alexander Valley, Sonoma County, North Coast, ..."
439,"Washington, USA"
402,"Umbria, Italy"
420,"Victoria, Australia"
181,"Hunter Valley, New South Wales, Australia"
170,Greece
250,"Morey-St.-Denis, Côte de Nuits, Burgundy, France"


### Geocode the Address dataframe
Reference: [Python’s geocoding — Convert a list of addresses into a map](https://towardsdatascience.com/pythons-geocoding-convert-a-list-of-addresses-into-a-map-f522ef513fd6)

In [11]:
# Initialize Nominatim into geolocator variable.
geolocator = Nominatim(user_agent='wine app')

In [12]:
geolocator.geocode('Castilla y León, Spain').raw

{'place_id': 258252333,
 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
 'osm_type': 'relation',
 'osm_id': 349041,
 'boundingbox': ['40.0824504', '43.2382034', '-7.077073', '-1.7753716'],
 'lat': '41.8037172',
 'lon': '-4.7471726',
 'display_name': 'Castilla y León, España',
 'class': 'boundary',
 'type': 'administrative',
 'importance': 0.9625997816800999,
 'icon': 'https://nominatim.openstreetmap.org/ui/mapicons//poi_boundary_administrative.p.20.png'}

In [13]:
geolocator.geocode('Castilla y León, Spain').point

Point(41.8037172, -4.7471726, 0.0)

In [14]:
# Apply geolocator to the Address column in the GeoList dataframe.
df_GeoList['loc'] = df_GeoList['Address'].apply(geolocator.geocode)

In [15]:
# Get .point containing lat/long from Geocode response, if not none.
df_GeoList['point'] = df_GeoList['loc'].apply(lambda loc: tuple(loc.point) if loc else None)

In [16]:
# Split the .point column into separate columns for lat, long, and altitude
df_GeoList[['lat', 'long', 'altitude']] = pd.DataFrame(df_GeoList['point'].to_list(), index=df_GeoList.index)

In [17]:
df_GeoList

Unnamed: 0,Address,loc,point,lat,long,altitude
0,"Abruzzo, Italy","(Abruzzo, Italia, (42.227681, 13.854983))","(42.227681, 13.854983, 0.0)",42.227681,13.854983,0.0
1,"Adelaide Hills, South Australia, Australia","(Adelaide Hills Council, South Australia, Aust...","(-34.901351649999995, 138.8293202817461, 0.0)",-34.901352,138.829320,0.0
2,"Aegean Islands, Greece","(Aegean, Σάμη - Αγία Ευφημία, Καραβόμυλος, Δήμ...","(38.2504094, 20.6304217, 0.0)",38.250409,20.630422,0.0
3,"Aglianico del Vulture, Basilicata, Italy",,,,,
4,"Agrelo, Mendoza, Argentina","(Agrelo, Distrito Agrelo, Departamento Luján d...","(-33.1184629, -68.8859261, 0.0)",-33.118463,-68.885926,0.0
5,"Alba, Piedmont | Piemonte, Italy",,,,,
6,"Alentejo, Portugal","(Alentejo, Portugal, (38.0551003, -7.8605799))","(38.0551003, -7.8605799, 0.0)",38.055100,-7.860580,0.0
7,"Alexander Valley, Sonoma County, North Coast, ...",,,,,
8,"Alicante, Valencia, Spain","(Alacant / Alicante, l'Alacantí, Alacant / Ali...","(38.353738, -0.4901846, 0.0)",38.353738,-0.490185,0.0
9,"Almansa, Castilla La Mancha, Spain","(Almansa, Albacete, Castilla-La Mancha, 02640,...","(38.8682065, -1.0978627, 0.0)",38.868206,-1.097863,0.0


### Append geography details to the GeoCache dataframe
Determine how well populated geography is at different hierarchy levels.

In [18]:
df_GeoCache = pd.merge(df_GeoCache, df_GeoList, on = 'Address', how = 'left' )

In [19]:
df_GeoCache.to_csv(path_or_buf = './GeoCache.csv', index = False)

### Append Hierarchy 00 details to the df_Wine dataset

In [20]:
# filter df_GeoCache to Hierarchy_00

df_GeoCache00 = df_GeoCache[
    (df_GeoCache.Hierarchy == 'Hierarchy_00')
]

df_GeoCache00.sample(10)

Unnamed: 0,Geography,Hierarchy,Address,loc,point,lat,long,altitude
1,Agrelo,Hierarchy_00,Argentina,"(Argentina, (-34.9964963, -64.9672817))","(-34.9964963, -64.9672817, 0.0)",-34.996496,-64.967282,0.0
228,Morellino di Scansano,Hierarchy_00,Italy,"(Italia, (42.6384261, 12.674297))","(42.6384261, 12.674297, 0.0)",42.638426,12.674297,0.0
266,Jerez-Xérès,Hierarchy_00,Spain,"(España, (39.3260685, -4.8379791))","(39.3260685, -4.8379791, 0.0)",39.326068,-4.837979,0.0
306,Lake County,Hierarchy_00,USA,"(United States, (39.7837304, -100.4458825))","(39.7837304, -100.4458825, 0.0)",39.78373,-100.445882,0.0
168,Nahe,Hierarchy_00,Germany,"(Deutschland, (51.0834196, 10.4234469))","(51.0834196, 10.4234469, 0.0)",51.08342,10.423447,0.0
75,Chassagne-Montrachet En Remilly,Hierarchy_00,France,"(France, (46.603354, 1.8883335))","(46.603354, 1.8883335, 0.0)",46.603354,1.888334,0.0
232,Sant'Antimo,Hierarchy_00,Italy,"(Italia, (42.6384261, 12.674297))","(42.6384261, 12.674297, 0.0)",42.638426,12.674297,0.0
175,Naoussa,Hierarchy_00,Greece,"(Ελλάδα, (38.9953683, 21.9877132))","(38.9953683, 21.9877132, 0.0)",38.995368,21.987713,0.0
189,Taurasi,Hierarchy_00,Italy,"(Italia, (42.6384261, 12.674297))","(42.6384261, 12.674297, 0.0)",42.638426,12.674297,0.0
45,Maipo Valley,Hierarchy_00,Chile,"(Chile, (-31.7613365, -71.3187697))","(-31.7613365, -71.3187697, 0.0)",-31.761336,-71.31877,0.0


In [21]:
df_Wine00 = pd.merge(df_Wine, df_GeoCache00, on = 'Geography', how = 'left')

df_Wine00.sample(10)

Unnamed: 0,Review_Year,Rank,Vintage,Score,Price,Winemaker,Wine,Wine_Style,Grape_Blend,Blend_List,...,Best_Drink_from,Best_Drink_Through,Review,Hierarchy,Address,loc,point,lat,long,altitude
1324,2007.0,23,2004,93.0,45,Bodega Catena Zapata,Malbec Mendoza Alta,Red,Malbec,,...,2007.0,2010.0,"Dark and lush, with a gorgeous mouthfeel to th...",Hierarchy_00,Argentina,"(Argentina, (-34.9964963, -64.9672817))","(-34.9964963, -64.9672817, 0.0)",-34.996496,-64.967282,0.0
2431,1996.0,30,1994,96.0,27,Turley,Zinfandel Napa Valley Hayne Vineyard,Red,Zinfandel,,...,,,Even more impressive than the first release fr...,Hierarchy_00,USA,"(United States, (39.7837304, -100.4458825))","(39.7837304, -100.4458825, 0.0)",39.78373,-100.445882,0.0
2777,1993.0,75,1990,91.0,17,Clos Pegase,Cabernet Sauvignon Napa Valley,Red,Cabernet Sauvignon,,...,1993.0,2002.0,"Firm and intense with a solid core of cherry, ...",Hierarchy_00,USA,"(United States, (39.7837304, -100.4458825))","(39.7837304, -100.4458825, 0.0)",39.78373,-100.445882,0.0
2013,2000.0,12,1997,96.0,52,Antinori,Bolgheri Superiore Guado al Tasso Tenuta Belve...,Red,Blend,Bordeaux Blend Red,...,2005.0,,A balanced and harmonious young red. Dark ruby...,Hierarchy_00,Italy,"(Italia, (42.6384261, 12.674297))","(42.6384261, 12.674297, 0.0)",42.638426,12.674297,0.0
3199,1989.0,97,1985,91.0,21,Simi,Cabernet Sauvignon Sonoma County,Red,Cabernet Sauvignon,,...,,1994.0,Ripe and delicious but elegantly contained in ...,Hierarchy_00,USA,"(United States, (39.7837304, -100.4458825))","(39.7837304, -100.4458825, 0.0)",39.78373,-100.445882,0.0
883,2012.0,82,NV,90.0,16,Emilio Lustau,Amontillado Jerez Los Arcos Solera Reserva,Dessert & Fortified,Palomino,,...,2012.0,,"A more flattering style, with an off-dry edge ...",Hierarchy_00,Spain,"(España, (39.3260685, -4.8379791))","(39.3260685, -4.8379791, 0.0)",39.326068,-4.837979,0.0
386,2017.0,86,2014,94.0,75,Lancaster,Cabernet Sauvignon Alexander Valley,Red,Cabernet Sauvignon,,...,2019.0,2029.0,"A big, rich Cabernet, with firm, structured ta...",Hierarchy_00,USA,"(United States, (39.7837304, -100.4458825))","(39.7837304, -100.4458825, 0.0)",39.78373,-100.445882,0.0
3278,1988.0,76,1985,93.0,22,Sterling,Three Palms Vineyard Napa Valley,Red,Merlot,,...,1993.0,1993.0,Loaded with fruit and concentrated flavors tha...,Hierarchy_00,USA,"(United States, (39.7837304, -100.4458825))","(39.7837304, -100.4458825, 0.0)",39.78373,-100.445882,0.0
2484,1996.0,83,1994,90.0,15,Bernardus,Chardonnay Monterey County,White,Chardonnay,,...,,,"A bold, ripe and full-bodied white from Califo...",Hierarchy_00,USA,"(United States, (39.7837304, -100.4458825))","(39.7837304, -100.4458825, 0.0)",39.78373,-100.445882,0.0
795,2013.0,94,2009,94.0,60,Schiavenza,Barolo Prapò,Red,Blend,Nebbiolo,...,2017.0,2032.0,"Warm and generous, this red balances notes of ...",Hierarchy_00,Italy,"(Italia, (42.6384261, 12.674297))","(42.6384261, 12.674297, 0.0)",42.638426,12.674297,0.0


### Append Hierarchy 01 details to the df_Wine dataset

In [22]:
# filter df_GeoCache to Hierarchy_00

df_GeoCache01 = df_GeoCache[
    (df_GeoCache.Hierarchy == 'Hierarchy_01')
]

df_GeoCache01.sample(10)

Unnamed: 0,Geography,Hierarchy,Address,loc,point,lat,long,altitude
366,Mudgee,Hierarchy_01,"New South Wales, Australia","(New South Wales, Australia, (-31.8759835, 147...","(-31.8759835, 147.2869493, 0.0)",-31.875984,147.286949,0.0
529,Amyndaio,Hierarchy_01,"Macedonia, Greece, Greece","(Border България (Bulgaria) - Ελλάδα (Greece),...","(41.7427066, 26.1770757, 0.0)",41.742707,26.177076,0.0
493,Montlouis,Hierarchy_01,"Loire, France","(Loire, Auvergne-Rhône-Alpes, France métropoli...","(45.75385355, 4.045473682551104, 0.0)",45.753854,4.045474,0.0
650,Contra Costa County,Hierarchy_01,"California, USA","(California, United States, (36.7014631, -118....","(36.7014631, -118.755997, 0.0)",36.701463,-118.755997,0.0
439,Pommard La Platière,Hierarchy_01,"Burgundy, France","(Bourgogne, France métropolitaine, France, (47...","(47.27808725, 4.222486304306048, 0.0)",47.278087,4.222486,0.0
594,Prosecco,Hierarchy_01,"Veneto, Italy","(Veneto, Italia, (45.6476663, 11.8665254))","(45.6476663, 11.8665254, 0.0)",45.647666,11.866525,0.0
462,Clos des Lambrays,Hierarchy_01,"Burgundy, France","(Bourgogne, France métropolitaine, France, (47...","(47.27808725, 4.222486304306048, 0.0)",47.278087,4.222486,0.0
448,Puligny-Montrachet Les Folatières,Hierarchy_01,"Burgundy, France","(Bourgogne, France métropolitaine, France, (47...","(47.27808725, 4.222486304306048, 0.0)",47.278087,4.222486,0.0
589,Vino Nobile di Montepulciano,Hierarchy_01,"Tuscany, Italy","(Toscana, Italia, (43.4586541, 11.1389204))","(43.4586541, 11.1389204, 0.0)",43.458654,11.13892,0.0
613,Walker Bay,Hierarchy_01,"Western Cape, South Africa","(Western Cape, South Africa, (-33.546977, 20.7...","(-33.546977, 20.72753, 0.0)",-33.546977,20.72753,0.0


In [23]:
df_Wine01 = pd.merge(df_Wine, df_GeoCache01, on = 'Geography', how = 'left')

df_Wine01.sample(10)

Unnamed: 0,Review_Year,Rank,Vintage,Score,Price,Winemaker,Wine,Wine_Style,Grape_Blend,Blend_List,...,Best_Drink_from,Best_Drink_Through,Review,Hierarchy,Address,loc,point,lat,long,altitude
3237,1988.0,35,1985,97.0,90,Château Latour,Pauillac,Red,Blend,Cabernet Sauvignon – Merlot,...,,,"A majestic wine, a bit hard and lean, with str...",Hierarchy_01,"Bordeaux, France","(Bordeaux, Gironde, Nouvelle-Aquitaine, France...","(44.841225, -0.5800364, 0.0)",44.841225,-0.580036,0.0
965,2011.0,64,2008,95.0,70,Quinta do Vale Meão,Douro,Red,Blend,"Touriga Nacional, Touriga Franca, Tinta Roriz ...",...,2011.0,2020.0,"Very fresh and pure-tasting, displaying a mine...",Hierarchy_01,"Douro, Portugal","(Douro, Norte, Portugal, (41.17004195, -7.3047...","(41.17004195, -7.304749811735755, 0.0)",41.170042,-7.30475,0.0
1753,2003.0,52,1997,97.0,99,Lisini,Brunello di Montalcino Ugolaia,Red,Brunello di Montalcino,,...,2005.0,,"Fabulous bottle. Very, very classy aromas of f...",Hierarchy_01,"Tuscany, Italy","(Toscana, Italia, (43.4586541, 11.1389204))","(43.4586541, 11.1389204, 0.0)",43.458654,11.13892,0.0
2225,1998.0,8,1996,95.0,52,Argiano,Toscana Solengo,Red,Blend,"Sangiovese, Cabernet Sauvignon, Merlot and Syrah",...,2000.0,,Wild thing. This Italian red is even better th...,Hierarchy_01,"Tuscany, Italy","(Toscana, Italia, (43.4586541, 11.1389204))","(43.4586541, 11.1389204, 0.0)",43.458654,11.13892,0.0
3054,1990.0,52,1987,93.0,24,Clos du Bois,Chardonnay Alexander Valley Winemaker's Reserve,White,Chardonnay,,...,1990.0,1993.0,"A rich, smooth, creamy style that offers a bro...",Hierarchy_01,"California, USA","(California, United States, (36.7014631, -118....","(36.7014631, -118.755997, 0.0)",36.701463,-118.755997,0.0
557,2015.0,57,2012,95.0,90,Altamura,Cabernet Sauvignon Napa Valley,Red,Cabernet Sauvignon,,...,2015.0,2028.0,"Amazingly pure, rich and complex, with tiers o...",Hierarchy_01,"California, USA","(California, United States, (36.7014631, -118....","(36.7014631, -118.755997, 0.0)",36.701463,-118.755997,0.0
360,2017.0,60,2015,90.0,25,Casa Santos Lima,Lisboa Colossal Reserva,Red,Blend,Portuguese Red Blend,...,2017.0,2020.0,Plum and boysenberry notes weave together with...,Hierarchy_01,"Estremadura, Lisboa, Portugal","(Rua da Estremadura, Vila Chã, Santo António d...","(38.635382, -9.0427141, 0.0)",38.635382,-9.042714,0.0
2027,2000.0,26,1995,97.0,185,Penfolds,Shiraz South Australia Grange,Red,Shiraz | Syrah,,...,2004.0,2020.0,"A massive wine, majestic in its proportions,co...",Hierarchy_01,"South Australia, Australia","(South Australia, Australia, (-30.5343665, 135...","(-30.5343665, 135.6301212, 0.0)",-30.534367,135.630121,0.0
2162,1999.0,61,1997,91.0,18,Steele,Chardonnay California Steele Cuvée,White,Chardonnay,,...,1999.0,2001.0,Lots of up-front fruit and spice make this whi...,Hierarchy_01,"California, USA","(California, United States, (36.7014631, -118....","(36.7014631, -118.755997, 0.0)",36.701463,-118.755997,0.0
1179,2009.0,78,NV,90.0,20,Gloria Ferrer,Brut Sonoma County Sonoma,Sparkling,Blend,"85% Pinot Noir, 15% Chardonnay",...,2009.0,2012.0,"Lively and fun to drink, with creamy Asian pea...",Hierarchy_01,"California, USA","(California, United States, (36.7014631, -118....","(36.7014631, -118.755997, 0.0)",36.701463,-118.755997,0.0


### Save files for use in other notebooks

In [24]:
df_Wine00.to_csv(path_or_buf = './Wine_Hier00.csv', index = False)
df_Wine01.to_csv(path_or_buf = './Wine_Hier01.csv', index = False)