In [1]:
# Basic Libraries
import numpy as np
import pandas as pd

In [2]:
# Importing the wine data
wineData = pd.read_csv('winemag-data-130k-v2.csv')
wineData.head()

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks


In [3]:
# Filtering data (only getting columns of points and taster_name)
filteredWine = pd.DataFrame(wineData[["points", "taster_name"]])

# Dropping the rows where points / taster_name == NaN
filteredWine = filteredWine.dropna()
filteredWine.describe()

Unnamed: 0,points
count,103727.0
mean,88.612107
std,2.955039
min,80.0
25%,87.0
50%,88.0
75%,91.0
max,100.0


In [4]:
# Getting the unique taster_names
uniqueTasters = filteredWine["taster_name"].unique()
uniqueTasters

array(['Kerin O’Keefe', 'Roger Voss', 'Paul Gregutt',
       'Alexander Peartree', 'Michael Schachner', 'Anna Lee C. Iijima',
       'Virginie Boone', 'Matt Kettmann', 'Sean P. Sullivan',
       'Jim Gordon', 'Joe Czerwinski', 'Anne Krebiehl\xa0MW',
       'Lauren Buzzeo', 'Mike DeSimone', 'Jeff Jenssen',
       'Susan Kostrzewa', 'Carrie Dykes', 'Fiona Adams',
       'Christina Pickard'], dtype=object)

In [5]:
tastersDict = {}

# Getting the max, min and stdev of the tasters' scores into a dictionary
for i in uniqueTasters:
    tastersScore = filteredWine[(filteredWine == i).any(axis=1)]
    #display(tastersScore)
    stdev = tastersScore["points"].std()
    maxX = tastersScore["points"].max()
    minN = tastersScore["points"].min()
    index = tastersScore.index
    countT = len(index)
    tastersDict[i] = [maxX, minN, stdev, countT]
    #print("{} has a maximum score of {}, minimum score of {}, and standard deviation of {:.2f}".format(i, maxX, minN, stdev))
    #print()
    
tastersDict

{'Kerin O’Keefe': [100, 80, 2.474240167769019, 10776],
 'Roger Voss': [100, 80, 3.0363727807256753, 25514],
 'Paul Gregutt': [100, 80, 2.814445219283458, 9532],
 'Alexander Peartree': [91, 80, 1.9345106992266201, 415],
 'Michael Schachner': [98, 80, 3.0228593421094647, 15134],
 'Anna Lee C. Iijima': [98, 80, 2.561359447271101, 4415],
 'Virginie Boone': [99, 80, 2.996795911657403, 9537],
 'Matt Kettmann': [97, 81, 2.5712565669221616, 6332],
 'Sean P. Sullivan': [97, 80, 2.4585470224886916, 4966],
 'Jim Gordon': [97, 80, 2.6983407039686735, 4177],
 'Joe Czerwinski': [100, 80, 2.858700937665035, 5147],
 'Anne Krebiehl\xa0MW': [97, 80, 2.373100247210104, 3685],
 'Lauren Buzzeo': [95, 81, 2.5306717784539776, 1835],
 'Mike DeSimone': [94, 82, 1.992549132650219, 514],
 'Jeff Jenssen': [97, 82, 2.0227271803853326, 491],
 'Susan Kostrzewa': [94, 80, 2.3761398185634874, 1085],
 'Carrie Dykes': [92, 81, 1.9986180190392988, 139],
 'Fiona Adams': [91, 82, 1.739436985245808, 27],
 'Christina Pickard

In [7]:
# Converting the tastersDict into a panda dataframe
tastersData = pd.DataFrame(tastersDict)

# Renaming the column names
tastersDataNew = tastersData.rename(index={0: 'Max', 1: 'Min', 2: 'Standard Deviation', 3: 'Count'})

display(tastersDataNew)

Unnamed: 0,Kerin O’Keefe,Roger Voss,Paul Gregutt,Alexander Peartree,Michael Schachner,Anna Lee C. Iijima,Virginie Boone,Matt Kettmann,Sean P. Sullivan,Jim Gordon,Joe Czerwinski,Anne Krebiehl MW,Lauren Buzzeo,Mike DeSimone,Jeff Jenssen,Susan Kostrzewa,Carrie Dykes,Fiona Adams,Christina Pickard
Max,100.0,100.0,100.0,91.0,98.0,98.0,99.0,97.0,97.0,97.0,100.0,97.0,95.0,94.0,97.0,94.0,92.0,91.0,93.0
Min,80.0,80.0,80.0,80.0,80.0,80.0,80.0,81.0,80.0,80.0,80.0,80.0,81.0,82.0,82.0,80.0,81.0,82.0,82.0
Standard Deviation,2.47424,3.036373,2.814445,1.934511,3.022859,2.561359,2.996796,2.571257,2.458547,2.698341,2.858701,2.3731,2.530672,1.992549,2.022727,2.37614,1.998618,1.739437,3.600926
Count,10776.0,25514.0,9532.0,415.0,15134.0,4415.0,9537.0,6332.0,4966.0,4177.0,5147.0,3685.0,1835.0,514.0,491.0,1085.0,139.0,27.0,6.0
