In [1]:
%matplotlib notebook

In [2]:
# Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [3]:
# Files to load
variety_by_sales_data_to_load = "data/top_wine_variety_by_sales_volume.csv"
winemag_data_to_load = "data/winemag_data_v2.csv"

# Read data
variety_by_sales_data = pd.read_csv(variety_by_sales_data_to_load)
winemag_data = pd.read_csv(winemag_data_to_load)

winemag_data_variety_score = winemag_data.drop(columns=["Unnamed: 0","description","region_2","taster_name",
                                                           "taster_twitter_handle","title"])

winemag_data_variety_score.head()

Unnamed: 0,country,designation,points,price,province,region_1,variety,winery
0,Italy,Vulkà Bianco,87,,Sicily & Sardinia,Etna,White Blend,Nicosia
1,Portugal,Avidagos,87,15.0,Douro,,Portuguese Red,Quinta dos Avidagos
2,US,,87,14.0,Oregon,Willamette Valley,Pinot Gris,Rainstorm
3,US,Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,Riesling,St. Julian
4,US,Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Pinot Noir,Sweet Cheeks


In [4]:
# Top 5 wine variety world mean wine scores

chardonnay = winemag_data_variety_score.loc[winemag_data_variety_score["variety"] == "Chardonnay"]
chardonnay_country = chardonnay.groupby("country")
chardonnay_country_average = chardonnay_country["points"].mean()

cabernet_sauvignon = winemag_data_variety_score.loc[winemag_data_variety_score["variety"] == "Cabernet Sauvignon"]
cabernet_sauvignon_country = cabernet_sauvignon.groupby("country")
cabernet_sauvignon_country_average = cabernet_sauvignon_country["points"].mean()

red_blend = winemag_data_variety_score.loc[winemag_data_variety_score["variety"] == "Red Blend"]
red_blend_country = red_blend.groupby("country")
red_blend_country_average = red_blend_country["points"].mean()

pinot_grigio = winemag_data_variety_score.loc[winemag_data_variety_score["variety"] == "Pinot Grigio"]
pinot_grigio_country = pinot_grigio.groupby("country")
pinot_grigio_country_average = pinot_grigio_country["points"].mean()

pinot_gris = winemag_data_variety_score.loc[winemag_data_variety_score["variety"] == "Pinot Gris"]
pinot_gris_country = pinot_gris.groupby("country")
pinot_gris_country_average = pinot_gris_country["points"].mean()

merlot = winemag_data_variety_score.loc[winemag_data_variety_score["variety"] == "Merlot"]
merlot_country = merlot.groupby("country")
merlot_country_average = merlot_country["points"].mean()

# summary dataframe
summary_table_2 = pd.DataFrame({"Chardonnay": chardonnay_country_average,
                                "Cabernet Sauvignon": cabernet_sauvignon_country_average,
                                "Red Blend": red_blend_country_average,
                                "Pinot Grigio": pinot_grigio_country_average,
                                "Pinot Gris": pinot_gris_country_average,
                                "Merlot": merlot_country_average})

summary_table_2.index.name = None
summary_table_2

Unnamed: 0,Chardonnay,Cabernet Sauvignon,Red Blend,Pinot Grigio,Pinot Gris,Merlot
Argentina,84.945763,86.048148,88.397436,83.777778,84.888889,85.033898
Australia,87.3175,89.25188,87.833333,86.0,87.428571,85.736842
Austria,90.301587,87.4,89.913043,,90.066667,91.0
Brazil,83.8,83.0,85.333333,,,84.428571
Bulgaria,88.5,87.85,89.181818,88.0,,87.692308
Canada,88.884615,90.0,90.6,,90.25,89.0
Chile,85.133721,86.688716,88.943765,83.0,,85.244186
Croatia,,,87.666667,,83.0,
Cyprus,,,87.0,,,
Czech Republic,,,89.0,,,


In [5]:
# Top 5 wine variety us mean wine scores

winemag_data_variety_score_us = winemag_data_variety_score.loc[winemag_data_variety_score["country"] == "US"]

chardonnay_us = winemag_data_variety_score_us.loc[winemag_data_variety_score_us["variety"] == "Chardonnay"]
chardonnay_us_average = chardonnay_us["points"].mean()

cabernet_sauvignon_us = winemag_data_variety_score_us.loc[winemag_data_variety_score_us["variety"] == "Cabernet Sauvignon"]
cabernet_sauvignon_us_average = cabernet_sauvignon_us["points"].mean()

red_blend_us = winemag_data_variety_score_us.loc[winemag_data_variety_score_us["variety"] == "Red Blend"]
red_blend_us_average = red_blend_us["points"].mean()

pinot_grigio_us = winemag_data_variety_score_us.loc[winemag_data_variety_score_us["variety"] == "Pinot Grigio"]
pinot_grigio_us_average = pinot_grigio_us["points"].mean()

pinot_gris_us = winemag_data_variety_score_us.loc[winemag_data_variety_score_us["variety"] == "Pinot Gris"]
pinot_gris_us_average = pinot_gris_us["points"].mean()

merlot_us = winemag_data_variety_score_us.loc[winemag_data_variety_score_us["variety"] == "Merlot"]
merlot_us_average = merlot_us["points"].mean()

# summary dataframe
summary_table_2_us = pd.DataFrame({
    "Wine Variety": ["Chardonnay", "Cabernet Sauvignon", "Red Blend", "Pinot Grigio/Pinot Gris", "Merlot"],
    "Mean Wine Score": [chardonnay_us_average, cabernet_sauvignon_us_average, red_blend_us_average, 
                        (pinot_grigio_us_average + pinot_gris_us_average)/2, merlot_us_average]
})

summary_table_2_us.index.name = None
summary_table_2_us

Unnamed: 0,Wine Variety,Mean Wine Score
0,Chardonnay,88.443611
1,Cabernet Sauvignon,89.02529
2,Red Blend,87.850942
3,Pinot Grigio/Pinot Gris,86.950016
4,Merlot,87.386846


In [6]:
# Set x- and y-axis and tick locations
x_axis = np.arange(0,len(summary_table_2_us),1)
y_axis = summary_table_2_us["Mean Wine Score"]
tick_locations = [value for value in x_axis]

# Create a list indicating where to write x labels
ha = "right"
plt.bar(x_axis, y_axis, color='b', alpha=0.5, align="center")
plt.xticks(tick_locations, summary_table_2_us["Wine Variety"], rotation=25, ha=ha)

# Set x and y limits
plt.xlim(-0.75, len(x_axis))
plt.ylim(80, 100)

# Set a title and labels
plt.title("Wine Variety vs. Mean Wine Score (US)")
plt.xlabel("Wine Variety")
plt.ylabel("Mean Wine Score (US)")

# Save our graph and show the graph
plt.tight_layout()
plt.savefig("Output/Output2.png")
plt.show()

<IPython.core.display.Javascript object>