In [23]:
import pandas as pd
import numpy as np
import chart_studio.plotly as py
import cufflinks as cf
import seaborn as sns
import plotly.express as px
import plotly.colors as pc
import plotly.graph_objects as go
%matplotlib inline

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline()

In [3]:
df_04 = pd.read_csv("04.csv")
keywords = ["coffee", "toast", "green apple", "cream", "citrus"]
df_04['keywords'] = df_04['keywords'].str.lower()
keywords = [kw.lower() for kw in keywords]
df_04['keywords'] = df_04['keywords'].str.split(', ')
df_04 = df_04.explode('keywords')
df_filtered = df_04[df_04['keywords'].isin(keywords) & (df_04['total_user_count'] >= 10)]
fig_keywords = px.bar(df_filtered, x='keywords', y='total_user_count', color='wine_name', 
                      title='Wines Related to Specific Keywords')
fig_keywords.show()

In [4]:
df_05 = pd.read_csv("05.csv")
top_grapes = df_05['grape'].value_counts().head(3).index.tolist()
best_wines = pd.DataFrame()
for grape in top_grapes:
    top_wines = df_05[df_05['grape'] == grape].nlargest(5, 'rating')
    best_wines = pd.concat([best_wines, top_wines])
best_wines = best_wines.reset_index(drop=True)
fig_best_wines = px.bar(best_wines, x='grape', y='rating', color='wine', 
                        title='Top 5 Best Rated Wines for the Top 3 Most Common Grapes',
                        hover_data=['wine', 'rating'])
fig_best_wines.show()

In [13]:
df_countries = pd.read_csv("06_part_01.csv")
df_vintages = pd.read_csv("06_part_02.csv")

regular_countries = ['Allemagne', 'Israël', 'États-Unis', 'Moldavie', 'Hongrie', 
                     'Afrique du Sud', 'Australie', 'France', 'Espagne', 'Portugal', 
                     'Chili', 'Italie', 'Argentine', 'Grèce', 'Roumanie', 'Suisse', 'Croatie']

vintage_countries = ['Roumanie', 'Croatie', 'Argentine', 'Chili', 'Portugal', 
                     'Moldavie', 'États-Unis', 'Afrique du Sud', 'Italie', 'Espagne', 
                     'Hongrie', 'France', 'Australie', 'Grèce', 'Suisse', 
                     'Allemagne', 'Israël']

all_countries = list(set(regular_countries + vintage_countries))

extended_palette = px.colors.qualitative.T10 + px.colors.qualitative.Dark24 + px.colors.qualitative.Set3

if len(all_countries) > len(extended_palette):
    extended_palette = extended_palette * (len(all_countries) // len(extended_palette) + 1)

country_color_map = {country: extended_palette[i] for i, country in enumerate(all_countries)}

country_avg_ratings = df_countries.groupby('country_name')['average_rating'].mean().round(2).reset_index()
country_avg_ratings = country_avg_ratings.sort_values(by='average_rating', ascending=False)

fig_country = px.bar(
    country_avg_ratings, 
    x='country_name', 
    y='average_rating', 
    title='Average Wine Rating by Country (Regular Wines)',
    labels={'average_rating': 'Average Rating', 'country_name': 'Country'},
    text='average_rating',
    color='country_name',
    color_discrete_map=country_color_map  
)

fig_country.update_traces(texttemplate='%{text:.2f}', textposition='outside')
fig_country.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')

fig_country.show()

vintage_country_avg_ratings = df_vintages.groupby('country_name')['average_rating'].mean().round(2).reset_index()
vintage_country_avg_ratings = vintage_country_avg_ratings.sort_values(by='average_rating', ascending=False)

fig_vintage_country = px.bar(
    vintage_country_avg_ratings, 
    x='country_name', 
    y='average_rating', 
    title='Average Wine Rating by Country (Vintage Wines)',
    labels={'average_rating': 'Average Rating', 'country_name': 'Country'},
    text='average_rating',
    color='country_name',
    color_discrete_map=country_color_map  
)

fig_vintage_country.update_traces(texttemplate='%{text:.2f}', textposition='outside')
fig_vintage_country.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')

fig_vintage_country.show()

In [21]:
df_value_wines = pd.read_csv("09.csv")

fig_value_wines = px.scatter(df_value_wines, 
                             x='price_euros', 
                             y='ratings_average', 
                             size='ratings_count', 
                             color='name', 
                             hover_name='name',
                             title='Best Value Wines: Price vs. Rating vs. Number of Ratings',
                             labels={'price': 'Price', 'rating': 'Rating', 'ratings_count': 'Number of Ratings'},
                             size_max=30)

fig_value_wines.update_traces(marker=dict(opacity=0.8, line=dict(width=1, color='DarkSlateGrey')))
fig_value_wines.update_layout(title_x=0.5)

fig_value_wines.show()

In [28]:

df_region = pd.read_csv("08.csv")
df_country = pd.read_csv("10.csv")

fig_region = px.bar(df_region, 
                    x='region_name', 
                    y='rating_count', 
                    color='total_wine', 
                    title='Wine Popularity by Region',
                    labels={'rating_count': 'Number of Ratings', 'region_name': 'Region', 'total_wine': 'Total Wines'},
                    text_auto=True)

fig_region.update_layout(barmode='stack', xaxis={'categoryorder': 'total descending'})
fig_region.show()

fig_country = px.bar(df_country, 
                     x='country_name', 
                     y='rating_count', 
                     color='total_wine', 
                     title='Wine Popularity by Country',
                     labels={'rating_count': 'Number of Ratings', 'country_name': 'Country', 'total_wine': 'Total Wines'},
                     text_auto=True)

fig_country.update_layout(barmode='stack', xaxis={'categoryorder': 'total descending'})
fig_country.show()
