In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

pd.set_option('display.max_rows', 500)
%matplotlib inline
sns.set()
sns.set_style("ticks")

In [2]:
# Uploading data and adjusting data

df1 = pd.read_csv('winemag-data-130k-v2.csv', encoding='latin1')
df2 = pd.read_csv('winemag-data_first150k.csv', encoding='latin1')
df = pd.concat([df1,df2],sort = False)
df = df.drop(['Unnamed: 0'], axis = 1)
df = df.drop_duplicates().reset_index(drop=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 217839 entries, 0 to 217838
Data columns (total 13 columns):
country                  217777 non-null object
description              217839 non-null object
designation              153398 non-null object
points                   217839 non-null int64
price                    200724 non-null float64
province                 217777 non-null object
region_1                 182347 non-null object
region_2                 86260 non-null object
taster_name              95071 non-null object
taster_twitter_handle    90542 non-null object
title                    119988 non-null object
variety                  217838 non-null object
winery                   217839 non-null object
dtypes: float64(1), int64(1), object(11)
memory usage: 21.6+ MB


In [3]:
# Adding grade based on points

def grades(points):
    if points in range(80,83):
        return 0
    elif points in range(83,87):
        return 1
    elif points in range(87,90):
        return 2
    elif points in range(90,94):
        return 3
    elif points in range(94,98):
        return 4
    else:
        return 5
    
df['grade'] = df['points'].apply(grades)

In [4]:
# Adding the dollar per point ratio

df['ratio'] = df['price']/df['points']

#### Evaluation

In [5]:
# Creating a dataframe for luxury wines

luxury = df[df['price'] > 800 ]
luxury = luxury.reset_index(drop=True)
luxury.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40 entries, 0 to 39
Data columns (total 15 columns):
country                  40 non-null object
description              40 non-null object
designation              14 non-null object
points                   40 non-null int64
price                    40 non-null float64
province                 40 non-null object
region_1                 35 non-null object
region_2                 2 non-null object
taster_name              25 non-null object
taster_twitter_handle    25 non-null object
title                    26 non-null object
variety                  40 non-null object
winery                   40 non-null object
grade                    40 non-null int64
ratio                    40 non-null float64
dtypes: float64(2), int64(2), object(11)
memory usage: 4.8+ KB


In [6]:
luxury['grade'].value_counts()

4    26
5    11
3     2
2     1
Name: grade, dtype: int64

In [7]:
luxury = luxury[luxury['grade'] > 4]
luxury['ratio'].describe()

count    11.000000
mean     13.536421
std       5.384471
min       8.480000
25%       8.629664
50%      14.000000
75%      17.193878
max      23.232323
Name: ratio, dtype: float64

In [8]:
luxury = luxury.nsmallest(6, 'ratio').reset_index(drop=True)
luxury = luxury.drop_duplicates('variety')
luxury

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,grade,ratio
0,France,"Full of ripe fruit, opulent and concentrated, ...",,100,848.0,Bordeaux,Pessac-LÃ©ognan,,Roger Voss,@vossroger,ChÃ¢teau Haut-Brion 2014 Pessac-LÃ©ognan,Bordeaux-style White Blend,ChÃ¢teau Haut-Brion,5,8.48
2,Australia,"This inky, embryonic wine deserves to be cella...",Grange,99,850.0,South Australia,South Australia,,Joe Czerwinski,@JoeCz,Penfolds 2010 Grange Shiraz (South Australia),Shiraz,Penfolds,5,8.585859
5,France,A wine that has created its own universe. It h...,Clos du Mesnil,100,1400.0,Champagne,Champagne,,,,,Chardonnay,Krug,5,14.0


In [9]:
# Creating a dataframe for very_expensive wines

very_expensive = df[(df['price'] > 300) & (df['price'] < 800.01)]
very_expensive = very_expensive.reset_index(drop=True)
very_expensive.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 485 entries, 0 to 484
Data columns (total 15 columns):
country                  485 non-null object
description              485 non-null object
designation              311 non-null object
points                   485 non-null int64
price                    485 non-null float64
province                 485 non-null object
region_1                 398 non-null object
region_2                 30 non-null object
taster_name              257 non-null object
taster_twitter_handle    234 non-null object
title                    286 non-null object
variety                  485 non-null object
winery                   485 non-null object
grade                    485 non-null int64
ratio                    485 non-null float64
dtypes: float64(2), int64(2), object(11)
memory usage: 57.0+ KB


In [10]:
very_expensive['grade'].value_counts()

4    344
3     77
5     48
2     12
1      3
0      1
Name: grade, dtype: int64

In [11]:
very_expensive = very_expensive[very_expensive['grade'] > 4]
very_expensive['ratio'].describe()

count    48.000000
mean      4.807621
std       1.485491
min       3.112245
25%       3.673469
50%       4.444444
75%       5.539141
max       8.163265
Name: ratio, dtype: float64

In [12]:
very_expensive = very_expensive.nsmallest(5, 'ratio').reset_index(drop=True)
very_expensive = very_expensive.drop_duplicates('variety').reset_index(drop=True)
very_expensive

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,grade,ratio
0,France,"With its gold color and mature, toasty flavors...",CuvÃ©e Sir Winston Churchill Brut,98,305.0,Champagne,Champagne,,Roger Voss,@vossroger,Pol Roger 2002 CuvÃ©e Sir Winston Churchill Br...,Champagne Blend,Pol Roger,5,3.112245
1,Italy,Here's a âwowâ wine you won't easily forge...,Messorio,99,320.0,Tuscany,Toscana,,,,Le Macchiole 2007 Messorio Merlot (Toscana),Merlot,Le Macchiole,5,3.232323
2,France,"A beautiful wine, smooth, polished, with round...",,98,319.0,Burgundy,Clos de Tart,,Roger Voss,@vossroger,Clos de Tart 2005 Clos de Tart,Pinot Noir,Clos de Tart,5,3.255102


In [13]:
# Creating a dataframe for expensive wines

expensive = df[(df['price'] > 100) & (df['price'] < 300.01)]
expensive = expensive.reset_index(drop=True)
expensive.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5041 entries, 0 to 5040
Data columns (total 15 columns):
country                  5041 non-null object
description              5041 non-null object
designation              4162 non-null object
points                   5041 non-null int64
price                    5041 non-null float64
province                 5041 non-null object
region_1                 4666 non-null object
region_2                 1598 non-null object
taster_name              2276 non-null object
taster_twitter_handle    2226 non-null object
title                    2917 non-null object
variety                  5041 non-null object
winery                   5041 non-null object
grade                    5041 non-null int64
ratio                    5041 non-null float64
dtypes: float64(2), int64(2), object(11)
memory usage: 590.9+ KB


In [14]:
expensive['grade'].value_counts()

3    2417
4    1935
2     468
1     116
5      99
0       6
Name: grade, dtype: int64

In [15]:
expensive = expensive[expensive['grade'] > 4]
expensive['ratio'].describe()

count    99.000000
mean      1.949832
std       0.620678
min       1.040816
25%       1.377551
50%       1.887755
75%       2.525253
max       3.061224
Name: ratio, dtype: float64

In [16]:
expensive = expensive.nsmallest(5, 'ratio').reset_index(drop=True)
expensive = expensive.drop_duplicates('variety').reset_index(drop=True)
expensive

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,grade,ratio
0,Italy,"Here's a âwowâ wine on every level, with i...",Guado al Tasso,98,102.0,Tuscany,Bolgheri Superiore,,,,Marchesi Antinori 2008 Guado al Tasso (Bolghe...,Red Blend,Marchesi Antinori,5,1.040816
1,Austria,"The intensity of the acidity, piercing through...",Zwischen den Steen Nummer 8 Trockenbeerenauslese,98,103.0,Burgenland,,,,,,Welschriesling,Kracher,5,1.05102
2,US,The opulence of this sparkling wine has to be ...,Reserve,98,110.0,California,Napa-Mendocino-Sonoma-Marin,North Coast,,,Schramsberg 2004 Reserve Sparkling (Napa-Mendo...,Sparkling Blend,Schramsberg,5,1.122449


In [39]:
# Creating a dataframe for medium wines

medium = df[(df['price'] > 40) & (df['price'] < 100.01)]
medium = medium.reset_index(drop=True)
medium.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44573 entries, 0 to 44572
Data columns (total 15 columns):
country                  44571 non-null object
description              44573 non-null object
designation              36962 non-null object
points                   44573 non-null int64
price                    44573 non-null float64
province                 44571 non-null object
region_1                 40466 non-null object
region_2                 23074 non-null object
taster_name              19691 non-null object
taster_twitter_handle    19121 non-null object
title                    25922 non-null object
variety                  44573 non-null object
winery                   44573 non-null object
grade                    44573 non-null int64
ratio                    44573 non-null float64
dtypes: float64(2), int64(2), object(11)
memory usage: 5.1+ MB


In [40]:
medium['grade'].value_counts()

3    25302
2    10215
4     5531
1     3226
0      239
5       60
Name: grade, dtype: int64

In [41]:
medium = medium[medium['grade'] > 4]
medium['ratio'].describe()

count    60.000000
mean      0.793356
std       0.146045
min       0.444444
25%       0.714286
50%       0.765306
75%       0.923469
max       1.020408
Name: ratio, dtype: float64

In [44]:
medium = medium.nsmallest(3, 'ratio').reset_index(drop=True)
medium = medium.drop_duplicates('variety').reset_index(drop=True)
medium

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,grade,ratio
0,US,"Shows classic, full-throttle notes of tropical...",Estate Vineyard,99,44.0,California,Sonoma Coast,Sonoma,,,Failla 2010 Estate Vineyard Chardonnay (Sonoma...,Chardonnay,Failla,5,0.444444
1,US,"A stunning Pirouetteâ63% Cabernet Sauvignon,...",Red Wine,98,50.0,Washington,Columbia Valley (WA),Columbia Valley,Paul Gregutt,@paulgwineÂ,Pirouette 2008 Red Wine Red (Columbia Valley (...,Bordeaux-style Red Blend,Pirouette,5,0.510204
2,US,The first thoughts on this wine are how young ...,Maritime Vineyard,98,52.0,California,Sonoma Coast,Sonoma,,,,Pinot Noir,W.H. Smith,5,0.530612


In [47]:
# Creating a dataframe for low wines

low = df[(df['price'] > 25) & (df['price'] < 40.01)]
low = low.reset_index(drop=True)
low.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 46110 entries, 0 to 46109
Data columns (total 15 columns):
country                  46090 non-null object
description              46110 non-null object
designation              33875 non-null object
points                   46110 non-null int64
price                    46110 non-null float64
province                 46090 non-null object
region_1                 40845 non-null object
region_2                 25624 non-null object
taster_name              20042 non-null object
taster_twitter_handle    19002 non-null object
title                    26011 non-null object
variety                  46110 non-null object
winery                   46110 non-null object
grade                    46110 non-null int64
ratio                    46110 non-null float64
dtypes: float64(2), int64(2), object(11)
memory usage: 5.3+ MB


In [48]:
low['grade'].value_counts()

3    18971
2    16948
1     8304
4      978
0      909
Name: grade, dtype: int64

In [49]:
low = low[low['grade'] > 3]
low['ratio'].describe()

count    978.000000
mean       0.375891
std        0.042667
min        0.273684
25%        0.340426
50%        0.378947
75%        0.414894
max        0.425532
Name: ratio, dtype: float64

In [52]:
low = low.nsmallest(3, 'ratio').reset_index(drop=True)
low = low.drop_duplicates('variety').reset_index(drop=True)
low

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,grade,ratio
0,France,A flinty touch of promising reduction still ho...,Goldert Grand Cru,95,26.0,Alsace,Alsace,,Anne KrebiehlÂ MW,@AnneInVino,Domaine Zind-Humbrecht 2015 Goldert Grand Cru ...,Muscat,Domaine Zind-Humbrecht,4,0.273684
1,US,"Massive, opulent, oozing in pineapple crÃ©me b...",,95,26.0,California,Sta. Rita Hills,Central Coast,,,Melville 2008 Chardonnay (Sta. Rita Hills),Chardonnay,Melville,4,0.273684
2,Italy,It boasts classic varietal aromas of crushed t...,,94,26.0,Northeastern Italy,Collio,,Kerin OâKeefe,@kerinokeefe,Russiz Superiore 2012 Sauvignon (Collio),Sauvignon,Russiz Superiore,4,0.276596


In [25]:
# Creating a dataframe for cheap wines

cheap = df[(df['price'] > 0) & (df['price'] < 25.01)]
cheap = cheap.reset_index(drop=True)
cheap.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104475 entries, 0 to 104474
Data columns (total 15 columns):
country                  104439 non-null object
description              104475 non-null object
designation              65706 non-null object
points                   104475 non-null int64
price                    104475 non-null float64
province                 104439 non-null object
region_1                 81776 non-null object
region_2                 35547 non-null object
taster_name              46029 non-null object
taster_twitter_handle    43229 non-null object
title                    56431 non-null object
variety                  104474 non-null object
winery                   104475 non-null object
grade                    104475 non-null int64
ratio                    104475 non-null float64
dtypes: float64(2), int64(2), object(11)
memory usage: 12.0+ MB


In [26]:
cheap['grade'].value_counts()

1    42977
2    40961
3    14805
0     5569
4      163
Name: grade, dtype: int64

In [27]:
cheap = cheap[cheap['grade'] > 3]
cheap['ratio'].describe()

count    163.000000
mean       0.237411
std        0.028919
min        0.138298
25%        0.212766
50%        0.252632
75%        0.263158
max        0.265957
Name: ratio, dtype: float64

In [28]:
cheap = cheap.nsmallest(3, 'ratio').reset_index(drop=True)
cheap = cheap.drop_duplicates('variety').reset_index(drop=True)
cheap

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,grade,ratio
0,Portugal,Assobio is from a single vineyard in the Quint...,Assobio,94,13.0,Douro,,,Roger Voss,@vossroger,Quinta dos MurÃ§as 2011 Assobio Red (Douro),Portuguese Red,Quinta dos MurÃ§as,4,0.138298
1,Spain,"A spectacularly sweet and rich bruiser, and on...",Pedro Ximenez 1827 Sweet Sherry,94,14.0,Andalucia,Jerez,,Michael Schachner,@wineschach,Osborne NV Pedro Ximenez 1827 Sweet Sherry She...,Sherry,Osborne,4,0.148936
2,Spain,This reserve-level P.X. sets the gold standard...,Cardenal Cisneros Reservas,94,15.0,Andalucia,Jerez,,,,,Pedro XimÃ©nez,Sanchez Romate,4,0.159574


In [72]:
# Some stupid moves, sorry

luxury['category'] = 'luxury'
very_expensive['category'] = 'very_expensive'
expensive['category'] = 'expensive'

In [73]:
medium['category'] = 'medium'
low['category'] = 'low'
cheap['category'] = 'cheap'

In [62]:
luxury

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,grade,ratio,category
0,France,"Full of ripe fruit, opulent and concentrated, ...",,100,848.0,Bordeaux,Pessac La ognan Bordeaux,,Roger Voss,@vossroger,ChÃ¢teau Haut-Brion 2014 Pessac-LÃ©ognan,Bordeaux-style White Blend,Chateau Haut-Brion,5,8.48,luxury
2,Australia,"This inky, embryonic wine deserves to be cella...",Grange,99,850.0,South Australia,South Australia,,Joe Czerwinski,@JoeCz,Penfolds 2010 Grange Shiraz (South Australia),Shiraz,Penfolds,5,8.585859,luxury
5,France,A wine that has created its own universe. It h...,Clos du Mesnil,100,1400.0,Champagne,Champagne,,,,,Chardonnay,Krug,5,14.0,luxury


In [61]:
luxury = luxury.replace('ChÃ¢teau Haut-Brion', 'Chateau Haut-Brion')
luxury = luxury.replace('Pessac-LÃ©ognan','Pessac La ognan Bordeaux')

In [34]:
very_expensive

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,grade,ratio,category
0,France,"With its gold color and mature, toasty flavors...",CuvÃ©e Sir Winston Churchill Brut,98,305.0,Champagne,Champagne,,Roger Voss,@vossroger,Pol Roger 2002 CuvÃ©e Sir Winston Churchill Br...,Champagne Blend,Pol Roger,5,3.112245,very_expensive
1,Italy,Here's a âwowâ wine you won't easily forge...,Messorio,99,320.0,Tuscany,Toscana,,,,Le Macchiole 2007 Messorio Merlot (Toscana),Merlot,Le Macchiole,5,3.232323,very_expensive
2,France,"A beautiful wine, smooth, polished, with round...",,98,319.0,Burgundy,Clos de Tart,,Roger Voss,@vossroger,Clos de Tart 2005 Clos de Tart,Pinot Noir,Clos de Tart,5,3.255102,very_expensive


In [35]:
expensive

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,grade,ratio,category
0,Italy,"Here's a âwowâ wine on every level, with i...",Guado al Tasso,98,102.0,Tuscany,Bolgheri Superiore,,,,Marchesi Antinori 2008 Guado al Tasso (Bolghe...,Red Blend,Marchesi Antinori,5,1.040816,expensive
1,Austria,"The intensity of the acidity, piercing through...",Zwischen den Steen Nummer 8 Trockenbeerenauslese,98,103.0,Burgenland,,,,,,Welschriesling,Kracher,5,1.05102,expensive
2,US,The opulence of this sparkling wine has to be ...,Reserve,98,110.0,California,Napa-Mendocino-Sonoma-Marin,North Coast,,,Schramsberg 2004 Reserve Sparkling (Napa-Mendo...,Sparkling Blend,Schramsberg,5,1.122449,expensive


In [74]:
medium

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,grade,ratio,category
0,US,"Shows classic, full-throttle notes of tropical...",Estate Vineyard,99,44.0,California,Sonoma Coast,Sonoma,,,Failla 2010 Estate Vineyard Chardonnay (Sonoma...,Chardonnay,Failla,5,0.444444,medium
1,US,"A stunning Pirouetteâ63% Cabernet Sauvignon,...",Red Wine,98,50.0,Washington,Columbia Valley (WA),Columbia Valley,Paul Gregutt,@paulgwineÂ,Pirouette 2008 Red Wine Red (Columbia Valley (...,Bordeaux-style Red Blend,Pirouette,5,0.510204,medium
2,US,The first thoughts on this wine are how young ...,Maritime Vineyard,98,52.0,California,Sonoma Coast,Sonoma,,,,Pinot Noir,W.H. Smith,5,0.530612,medium


In [75]:
low

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,grade,ratio,category
0,France,A flinty touch of promising reduction still ho...,Goldert Grand Cru,95,26.0,Alsace,Alsace,,Anne KrebiehlÂ MW,@AnneInVino,Domaine Zind-Humbrecht 2015 Goldert Grand Cru ...,Muscat,Domaine Zind-Humbrecht,4,0.273684,low
1,US,"Massive, opulent, oozing in pineapple crÃ©me b...",,95,26.0,California,Sta. Rita Hills,Central Coast,,,Melville 2008 Chardonnay (Sta. Rita Hills),Chardonnay,Melville,4,0.273684,low
2,Italy,It boasts classic varietal aromas of crushed t...,,94,26.0,Northeastern Italy,Collio,,Kerin OâKeefe,@kerinokeefe,Russiz Superiore 2012 Sauvignon (Collio),Sauvignon,Russiz Superiore,4,0.276596,low


In [76]:
cheap

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,grade,ratio,category
0,Portugal,Assobio is from a single vineyard in the Quint...,Assobio,94,13.0,Douro,,,Roger Voss,@vossroger,Quinta dos Muraças 2011 Assobio Red (Douro),Portuguese Red,Quinta dos Muraças,4,0.138298,cheap
1,Spain,"A spectacularly sweet and rich bruiser, and on...",Pedro Ximenez 1827 Sweet Sherry,94,14.0,Andalucia,Jerez,,Michael Schachner,@wineschach,Osborne NV Pedro Ximenez 1827 Sweet Sherry She...,Sherry,Osborne,4,0.148936,cheap
2,Spain,This reserve-level P.X. sets the gold standard...,Cardenal Cisneros Reservas,94,15.0,Andalucia,Jerez,,,,,Pedro XimÃ©nez,Sanchez Romate,4,0.159574,cheap


In [63]:
cheap = cheap.replace('Quinta dos MurÃ§as', 'Quinta dos Muraças')
cheap = cheap.replace('Quinta dos MurÃ§as 2011 Assobio Red (Douro)', 'Quinta dos Muraças 2011 Assobio Red (Douro)')

In [65]:
joined = 

In [77]:
joined = pd.concat([luxury, very_expensive, expensive, medium, low, cheap], ignore_index=True)

In [78]:
joined

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,grade,ratio,category
0,France,"Full of ripe fruit, opulent and concentrated, ...",,100,848.0,Bordeaux,Pessac La ognan Bordeaux,,Roger Voss,@vossroger,ChÃ¢teau Haut-Brion 2014 Pessac-LÃ©ognan,Bordeaux-style White Blend,Chateau Haut-Brion,5,8.48,luxury
1,Australia,"This inky, embryonic wine deserves to be cella...",Grange,99,850.0,South Australia,South Australia,,Joe Czerwinski,@JoeCz,Penfolds 2010 Grange Shiraz (South Australia),Shiraz,Penfolds,5,8.585859,luxury
2,France,A wine that has created its own universe. It h...,Clos du Mesnil,100,1400.0,Champagne,Champagne,,,,,Chardonnay,Krug,5,14.0,luxury
3,France,"With its gold color and mature, toasty flavors...",CuvÃ©e Sir Winston Churchill Brut,98,305.0,Champagne,Champagne,,Roger Voss,@vossroger,Pol Roger 2002 CuvÃ©e Sir Winston Churchill Br...,Champagne Blend,Pol Roger,5,3.112245,very_expensive
4,Italy,Here's a âwowâ wine you won't easily forge...,Messorio,99,320.0,Tuscany,Toscana,,,,Le Macchiole 2007 Messorio Merlot (Toscana),Merlot,Le Macchiole,5,3.232323,very_expensive
5,France,"A beautiful wine, smooth, polished, with round...",,98,319.0,Burgundy,Clos de Tart,,Roger Voss,@vossroger,Clos de Tart 2005 Clos de Tart,Pinot Noir,Clos de Tart,5,3.255102,very_expensive
6,Italy,"Here's a âwowâ wine on every level, with i...",Guado al Tasso,98,102.0,Tuscany,Bolgheri Superiore,,,,Marchesi Antinori 2008 Guado al Tasso (Bolghe...,Red Blend,Marchesi Antinori,5,1.040816,expensive
7,Austria,"The intensity of the acidity, piercing through...",Zwischen den Steen Nummer 8 Trockenbeerenauslese,98,103.0,Burgenland,,,,,,Welschriesling,Kracher,5,1.05102,expensive
8,US,The opulence of this sparkling wine has to be ...,Reserve,98,110.0,California,Napa-Mendocino-Sonoma-Marin,North Coast,,,Schramsberg 2004 Reserve Sparkling (Napa-Mendo...,Sparkling Blend,Schramsberg,5,1.122449,expensive
9,US,"Shows classic, full-throttle notes of tropical...",Estate Vineyard,99,44.0,California,Sonoma Coast,Sonoma,,,Failla 2010 Estate Vineyard Chardonnay (Sonoma...,Chardonnay,Failla,5,0.444444,medium


In [138]:
def add_latitude(winery):
    if winery == 'Chateau Haut-Brion':
        return 44.8058
    elif winery == 'Penfolds':
        return -34.9090
    elif winery == 'Krug':
        return 49.0000
    elif winery == 'Pol Roger':
        return 49.0434
    elif winery == 'Le Macchiole':
        return 43.2078
    elif winery == 'Clos de Tart':
        return 47.1956
    elif winery == 'Marchesi Antinori':
        return 45.60272
    elif winery == 'Kracher':
        return 47.7617
    elif winery == 'Schramsberg':
        return 38.578796
    elif winery == 'Failla':
        return 38.5481
    elif winery == 'Pirouette':
        return 46.2775
    elif winery == 'W.H. Smith':
        return 38.2919
    elif winery == 'Domaine Zind-Humbrecht':
        return 48.0838
    elif winery == 'Melville':
        return 40.7934
    elif winery == 'Russiz Superiore':
        return 45.9421
    elif winery == 'Quinta dos Muraças':
        return 41.1530
    elif winery == 'Osborne':
        return 37.5443
    elif winery == 'Sanchez Romate':
        return 36.6909
    else:
        return 0

In [139]:
def add_longitude(winery):
    if winery == 'Chateau Haut-Brion':
        return 0.6304
    elif winery == 'Penfolds':
        return 138.6758
    elif winery == 'Krug':
        return 4
    elif winery == 'Pol Roger':
        return 3.9562
    elif winery == 'Le Macchiole':
        return 10.6117
    elif winery == 'Clos de Tart':
        return 4.9617
    elif winery == 'Marchesi Antinori':
        return 10.06157
    elif winery == 'Kracher':
        return 16.8069
    elif winery == 'Schramsberg':
        return -122.579704
    elif winery == 'Failla':
        return -122.4904
    elif winery == 'Pirouette':
        return -117.8143
    elif winery == 'W.H. Smith':
        return -122.4580
    elif winery == 'Domaine Zind-Humbrecht':
        return 7.2957
    elif winery == 'Melville':
        return -73.41512
    elif winery == 'Russiz Superiore':
        return 13.5144
    elif winery == 'Quinta dos Muraças':
        return -7.6881
    elif winery == 'Osborne':
        return -4.7278
    elif winery == 'Sanchez Romate':
        return -6.1428
    else:
        return 0

In [140]:
joined['latitude'] = joined['winery'].apply(add_latitude)
joined['longitude'] = joined['winery'].apply(add_longitude)

In [141]:
joined = joined.replace('US','USA')

In [142]:
joined['country']

0        France
1     Australia
2        France
3        France
4         Italy
5        France
6         Italy
7       Austria
8           USA
9           USA
10          USA
11          USA
12       France
13          USA
14        Italy
15     Portugal
16        Spain
17        Spain
Name: country, dtype: object

In [143]:
# Plotting the map

px.scatter_geo(joined, lat = 'latitude', lon = 'longitude', color='category', hover_name="winery", projection="natural earth")

In [145]:
wind = px.data.wind()
fig = px.bar_polar(joined, r='points', theta='country', color='category', template="plotly_dark",
            color_discrete_sequence= px.colors.sequential.Plasma[-2::-1])
fig.show()