In [2]:
import os
import pandas as pd
import matplotlib as mlt
from pathlib import Path
import hvplot.pandas
import warnings
warnings.filterwarnings("ignore")


In [3]:
 #designate the csv file data path
data_load = Path ('../Resources/wine_data.csv')

# uniform language
wine_df = pd.read_csv(data_load,encoding="ISO-8859-1")

# removing duplicates
wine_df = wine_df.drop(["Unnamed: 7", "Unnamed: 8", "Unnamed: 9", "Unnamed: 10", "Unnamed: 11", "Unnamed: 12"], axis=1)
wine_df = wine_df.drop_duplicates()

# rennaming and capitalization of data
wine_df.rename(columns={'country':'Country'}, inplace=True)
wine_df.rename(columns={'description':'Description'}, inplace=True)
wine_df.rename(columns={'points':'Points'}, inplace=True)
wine_df.rename(columns={'price ($)':'Price ($)'}, inplace=True)
wine_df.rename(columns={'region':'Region'}, inplace=True)
wine_df.rename(columns={'title':'Title'}, inplace=True)
wine_df.rename(columns={'variety':'Variety'}, inplace=True)

#change types from objects to strings, anything that is a number to a float64/int
wine_df['Country']= wine_df['Country'].astype('string')
wine_df['Description']= wine_df['Description'].astype('string')
wine_df['Region']= wine_df['Region'].astype('string')
wine_df['Title']= wine_df['Title'].astype('string')
wine_df['Variety']= wine_df['Variety'].astype('string')
wine_df['Price ($)']=pd.to_numeric(wine_df['Price ($)'],errors='coerce')
wine_df['Points']=pd.to_numeric(wine_df['Points'],errors='coerce')

#drop unnecessary values
wine_df = wine_df.dropna(axis=0)

#print first five rows of data frame
wine_df.head()



Unnamed: 0,Country,Description,Points,Price ($),Region,Title,Variety
2,US,"Tart and snappy, the flavors of lime flesh and...",87.0,14.0,Willamette Valley,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris
3,US,"Pineapple rind, lemon pith and orange blossom ...",87.0,13.0,Lake Michigan Shore,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling
4,US,"Much like the regular bottling from 2012, this...",87.0,65.0,Willamette Valley,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir
5,Spain,Blackberry and raspberry aromas show a typical...,87.0,15.0,Navarra,Tandem 2011 Ars In Vitro Tempranillo-Merlot (N...,Tempranillo-Merlot
6,Italy,"Here's a bright, informal red that opens with ...",87.0,16.0,Vittoria,Terre di Giurfo 2013 Belsito Frappato (Vittoria),Frappato


In [4]:
# df types
wine_df.dtypes

Country        string[python]
Description    string[python]
Points                float64
Price ($)             float64
Region         string[python]
Title          string[python]
Variety        string[python]
dtype: object

In [5]:
# df count
wine_df.count()

Country        93576
Description    93576
Points         93576
Price ($)      93576
Region         93576
Title          93576
Variety        93576
dtype: int64

In [6]:
#find the different columns that are in data frame
wine_df.columns 

Index(['Country', 'Description', 'Points', 'Price ($)', 'Region', 'Title',
       'Variety'],
      dtype='object')

In [7]:
#isolate the top 10 wines based on variety data
df= wine_df['Variety'].value_counts()
threshold = 2500
drop_df = df[df > threshold].index

clean_wine_df = wine_df[wine_df['Variety'].isin(drop_df)==False].reset_index(drop=True)

clean_wine_df

Unnamed: 0,Country,Description,Points,Price ($),Region,Title,Variety
0,US,"Tart and snappy, the flavors of lime flesh and...",87.0,14.0,Willamette Valley,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris
1,US,"Pineapple rind, lemon pith and orange blossom ...",87.0,13.0,Lake Michigan Shore,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling
2,Spain,Blackberry and raspberry aromas show a typical...,87.0,15.0,Navarra,Tandem 2011 Ars In Vitro Tempranillo-Merlot (N...,Tempranillo-Merlot
3,Italy,"Here's a bright, informal red that opens with ...",87.0,16.0,Vittoria,Terre di Giurfo 2013 Belsito Frappato (Vittoria),Frappato
4,France,This dry and restrained wine offers spice in p...,87.0,24.0,Alsace,Trimbach 2012 Gewurztraminer (Alsace),Gewürztraminer
...,...,...,...,...,...,...,...
42650,Italy,"Blackberry, cassis, grilled herb and toasted a...",90.0,40.0,Sicilia,Cusumano 2012 Sàgana Tenuta San Giacomo Nero d...,Nero d'Avola
42651,France,"While it's rich, this beautiful dry wine also ...",90.0,28.0,Alsace,Domaine Rieflé-Landmann 2013 Seppi Landmann Va...,Pinot Gris
42652,France,Well-drained gravel soil gives this wine its c...,90.0,30.0,Alsace,Domaine Gresser 2013 Kritt Gewurztraminer (Als...,Gewürztraminer
42653,France,"A dry style of Pinot Gris, this is crisp with ...",90.0,32.0,Alsace,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris


In [8]:
clean_wine_df.dtypes

Country        string[python]
Description    string[python]
Points                float64
Price ($)             float64
Region         string[python]
Title          string[python]
Variety        string[python]
dtype: object

In [9]:
#isolate the wines that have points of 90 or avbove
high_points = clean_wine_df.loc[(clean_wine_df['Points']>= 90)]

high_points['Points'].value_counts()

Points
90.0     4843
91.0     3348
92.0     2688
93.0     1734
94.0     1010
95.0      433
96.0      148
97.0       62
98.0       24
99.0       11
100.0       8
Name: count, dtype: int64

In [10]:
#find the countries that produced the 90 or above wines
high_points['Country'].value_counts()

Country
US           4823
Italy        4114
France       3074
Spain        1162
Australia     578
Argentina     487
Canada         71
Name: count, dtype: Int64

In [11]:
#create map for countries



coordinates = {'longitude': [-95.7129,12.496366,2.213749, -4.00000000, 135.0000,-63.616672,-95.00],
                'latitude': [37.0902,41.902782, 46.227638, 40.00000000,-25.0000,-38.416097,60.00], 
               'Country': ['US', 'Italy', 'France', 'Spain', 'Australia', 'Argentina', 'Canada']}

coordinates_df= pd.DataFrame(coordinates)

coordinates_df

Unnamed: 0,longitude,latitude,Country
0,-95.7129,37.0902,US
1,12.496366,41.902782,Italy
2,2.213749,46.227638,France
3,-4.0,40.0,Spain
4,135.0,-25.0,Australia
5,-63.616672,-38.416097,Argentina
6,-95.0,60.0,Canada


In [12]:
country_map= coordinates_df.hvplot.points(
    "longitude",
    "latitude",
    geo = True,
    tiles = "OSM"
)

country_map

In [13]:
#isolate top 10 regions for US 

top_ten_regions = high_points.loc[(high_points['Country']=='US')]

top_ten_regions.head(10)


Unnamed: 0,Country,Description,Points,Price ($),Region,Title,Variety
69,US,"A beautiful wine, sleek, crisp and ultra-dry, ...",90.0,17.0,Santa Lucia Highlands,Banyan 2007 Riesling (Santa Lucia Highlands),Riesling
71,US,Smashed blackberry and blueberry fruit are enh...,91.0,35.0,Monterey,Wrath 2013 Destruction Level Red (Monterey),Rhône-style Red Blend
75,US,More complex than the winery's appellation ble...,91.0,28.0,Edna Valley,Claiborne & Churchill 2014 Claiborne Vineyard ...,Riesling
77,US,The addition of 15% Petite Sirah takes this bo...,91.0,28.0,Paso Robles,Dracaena 2013 Cabernet Franc (Paso Robles),Cabernet Franc
78,US,"A fairly elegant expression of the variety, th...",91.0,95.0,Napa Valley,Duckhorn 2012 Rector Creek Vineyard Merlot (Na...,Merlot
84,US,"This is a beatifully balanced, not-too-full-bo...",90.0,28.0,Nevada County,Sierra Starr 2014 Rising Starr Estate Bottled ...,Cabernet Franc
86,US,This bottling belongs to a series of wines fro...,90.0,30.0,Paso Robles,J. Lohr 2014 Gesture G-S-M (Paso Robles),G-S-M
90,US,"This is a big, brawny expression of Grenache, ...",90.0,36.0,Dry Creek Valley,Mounts 2014 Verah Red (Dry Creek Valley),Rhône-style Red Blend
92,US,"Pithy, with grapefruit and lemon peel flavors,...",90.0,20.0,Napa Valley,Robert Mondavi 2015 Fumé Blanc (Napa Valley),Fumé Blanc
191,US,"Thickly moussed, Roederer's Brut, 60% Chardonn...",92.0,23.0,Anderson Valley,Roederer Estate NV Brut Sparkling (Anderson Va...,Sparkling Blend


In [19]:
#create coordinates df
region_coordinates = {'longitude': [-121.326319, -121.898460,-120.6071139,-120.680008,-122.286865,-120.76875000,-122.966664,-123.43],
                'latitude': [36.424689, 36.603954,35.2166418,35.640556,38.297539,39.30137000, 38.699997,39.06 ], 
               'Region': ['Santa Lucia Highlands', 'Monterey', 'Edna Valley', 'Paso Robles', 'Napa Valley', 
                           'Nevada County', 'Dry Creek Valley', 'Anderson Valley']}

region_coordinates_df= pd.DataFrame(region_coordinates)

region_coordinates_df

Unnamed: 0,longitude,latitude,Region
0,-121.326319,36.424689,Santa Lucia Highlands
1,-121.89846,36.603954,Monterey
2,-120.607114,35.216642,Edna Valley
3,-120.680008,35.640556,Paso Robles
4,-122.286865,38.297539,Napa Valley
5,-120.76875,39.30137,Nevada County
6,-122.966664,38.699997,Dry Creek Valley
7,-123.43,39.06,Anderson Valley


In [20]:
#create map of top regions for US
region_map= region_coordinates_df.hvplot.points(
    "longitude",
    "latitude",
    geo = True,
    tiles = "OSM"
)

region_map

In [21]:
canada_regions = high_points.loc[(high_points['Country']=='Canada')]

canada_regions.head(10)


Unnamed: 0,Country,Description,Points,Price ($),Region,Title,Variety
190,Canada,"An aromatic knockout with notes of peach, papa...",92.0,30.0,Niagara-On-The-Lake,Pillitteri 2012 Reserve Icewine Vidal (Niagara...,Vidal
1898,Canada,A fine companion to the winery's Cabernet Sauv...,91.0,38.0,Okanagan Valley,Burrowing Owl 2013 Cabernet Franc (Okanagan Va...,Cabernet Franc
2306,Canada,Gorgeously aromatic with notes of freshly pres...,91.0,80.0,Niagara Peninsula,Inniskillin 2008 Ice Wine Riesling (Niagara Pe...,Riesling
2699,Canada,"This irresistible, lightly frizzante and utter...",92.0,20.0,Okanagan Valley,LaStella 2013 Moscato d'Osoyoos Moscato (Okana...,Moscato
2704,Canada,Produced from the estate vineyard's oldest vin...,92.0,25.0,Beamsville Bench,Cave Spring 2012 CSV Estate Bottled Riesling (...,Riesling
2840,Canada,"Smooth as silk and deeply concentrated, this o...",94.0,60.0,Niagara Peninsula,Cave Spring 2013 Riesling Icewine Riesling (Ni...,Riesling
2957,Canada,This wine is the vinous equivalent of biting i...,92.0,70.0,Niagara Peninsula,Stratus 2008 Icewine Riesling (Niagara Peninsula),Riesling
2958,Canada,Harvested at 40.16 Brix and retaining a whoppi...,92.0,77.0,Niagara Peninsula,Inniskillin 2007 Gold Icewine Vidal Blanc (Nia...,Vidal Blanc
3585,Canada,This vibrantly aromatic wine brings generous n...,91.0,60.0,Niagara Peninsula,Inniskillin 2011 Icewine Vidal (Niagara Penins...,Vidal
3864,Canada,Tart berry and sour cherry fruit is set amidst...,91.0,35.0,Okanagan Valley,Church & State 2010 Coyote Bowl Series Caberne...,Cabernet Franc


In [22]:
canada_region_coordinates = {'longitude': [-79.121445,-119.477829,-79.106667,-79.47522500],
                'latitude': [43.219646,49.882114,  43.060001, 43.16535600], 
               'Region': ['Niagara on the Lake', 'Okanagan Valley', 'Niagara Peninsula', 'Beamsville Bench']}

canada_region_coordinates_df= pd.DataFrame(canada_region_coordinates)

canada_region_coordinates_df

Unnamed: 0,longitude,latitude,Region
0,-79.121445,43.219646,Niagara on the Lake
1,-119.477829,49.882114,Okanagan Valley
2,-79.106667,43.060001,Niagara Peninsula
3,-79.475225,43.165356,Beamsville Bench


In [23]:
canada_region_map= canada_region_coordinates_df.hvplot.points(
    "longitude",
    "latitude",
    geo = True,
    tiles = "OSM"
)

canada_region_map