In [3]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import nbformat as nbf
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
import datashader as ds
import datashader.transfer_functions as tf
import datashader.colors as dc
from colorcet import fire,kbc,glasbey_bw,glasbey_dark,glasbey_light,bmw,gray

In [4]:
df=pd.read_csv('..//CSVs/8.csv')
df

Unnamed: 0,wines_taste,avg_ratings_average,avg_acidity,avg_intensity,avg_sweetness,avg_tannin
0,candied ginger,4.7,3.31,4.76,1.92,3.20
1,yellow apple,4.6,2.88,4.36,2.08,2.99
2,white nectarine,4.6,3.53,4.53,1.65,4.08
3,matcha,4.6,3.06,4.26,1.90,2.78
4,fresh coconut,4.6,4.12,3.99,1.67,4.12
...,...,...,...,...,...,...
434,sour cherry pie,4.3,4.03,3.75,1.64,3.94
435,preserved lemon,4.3,4.21,4.01,1.65,4.07
436,pomello,4.3,4.00,3.98,1.61,3.87
437,peanut shell,4.3,3.42,4.03,2.21,3.20


In [5]:
correlation_matrix = df[['avg_ratings_average', 'avg_acidity', 'avg_intensity', 'avg_sweetness', 'avg_tannin']].corr()
correlation_matrix

Unnamed: 0,avg_ratings_average,avg_acidity,avg_intensity,avg_sweetness,avg_tannin
avg_ratings_average,1.0,-0.2278,0.368985,0.071616,-0.105287
avg_acidity,-0.2278,1.0,-0.510971,-0.786915,0.8139
avg_intensity,0.368985,-0.510971,1.0,0.260566,-0.180251
avg_sweetness,0.071616,-0.786915,0.260566,1.0,-0.751247
avg_tannin,-0.105287,0.8139,-0.180251,-0.751247,1.0


In [6]:
fig = px.imshow(
    correlation_matrix,
    text_auto=True,
    color_continuous_scale='Viridis',
    title='Correlation Heatmap Between Taste Profiles and Ratings'
)

# Update layout for better appearance
fig.update_layout(
    # xaxis_title='Variables',
    # yaxis_title='Variables',
    xaxis=dict(tickvals=list(range(len(correlation_matrix.columns))), ticktext=correlation_matrix.columns),
    yaxis=dict(tickvals=list(range(len(correlation_matrix.index))), ticktext=correlation_matrix.index),
    title_x=0.5
)

# Show the plot
fig.show()

In [7]:
df_melted = df.melt(id_vars='avg_ratings_average', value_vars=['avg_sweetness', 'avg_acidity', 'avg_tannin'],
                     var_name='Taste Profile', value_name='Value')

# Create the violin plot
fig = px.violin(
    df_melted,
    y='Value',
    x='Taste Profile',
    color='Taste Profile',
    box=True,
    points='all',
    title='Distribution of Wines Across Different Taste Profiles',
    labels={
        'Value': 'Taste Profile Value',
        'Taste Profile': 'Taste Profile'
    }
)

# Update layout for better appearance
fig.update_layout(
    xaxis_title='Taste Profile',
    yaxis_title='Value',
    boxmode='overlay',  # Overlay the box plot on the violin plot
    title_x=0.5  # Center the title
)

# Show the plot
fig.show()

In [14]:
fig = px.histogram(
    df_melted,
    x='Value',
    color='Taste Profile',
    facet_col='Taste Profile',
    nbins=10,
    title='Distribution of Wines Across Different Taste Profiles ',
    labels={
        'Value': 'Taste Profile Value',
        'Taste Profile': 'Taste Profile'
    }
)

# Update layout for better appearance
fig.update_layout(
    xaxis_title='Taste Profile Value',
    yaxis_title='Count',
    
    # facet_col_title='Taste Profile',
    title_x=0.5  # Center the title
)

# Show the plot
fig.show()

In [8]:
fig = px.box(
    df_melted,
    y='Value',
    color='Taste Profile',
    title='Distribution of Wines Across Different Taste Profiles ,
    labels={
        'Value': 'Taste Profile Value',
        'Taste Profile': 'Taste Profile'
    }
)

# Update layout for better appearance
fig.update_layout(
    xaxis_title='Taste Profile',
    yaxis_title='Taste Profile Value',
    title_x=0.5  # Center the title
)
fig.show()

SyntaxError: unterminated string literal (detected at line 5) (3166052164.py, line 5)

In [16]:
fig = px.histogram(
    df,
    x='avg_sweetness',
    nbins=10,  # Number of bins
    title='Distribution of Wines Across Different Levels of Sweetness',
    labels={'avg_sweetness': 'Sweetness Value'}
)

# Update layout for better appearance
fig.update_layout(
    xaxis_title='Sweetness Value',
    yaxis_title='Count',
    title_x=0.5  # Center the title
)

# Show the plot
fig.show()

In [21]:
fig_sweetness = px.histogram(
    df,
    x='avg_sweetness',
    nbins=10,  # Number of bins
    title='Distribution of Wines Across Different Levels of Sweetness',
    labels={'avg_sweetness': 'Sweetness Value'}
)

# Create scatter plot for sweetness vs ratings
fig_sweetness_vs_ratings = px.scatter(
    df,
    x='avg_sweetness',
    y='avg_ratings_average',
    trendline='ols',  # Add a trendline to show correlation
    title='Sweetness vs Ratings',
    labels={'avg_sweetness': 'Sweetness Value', 'avg_ratings_average': 'Average Rating'}
)

# Create scatter plot for acidity vs ratings
fig_acidity_vs_ratings = px.scatter(
    df,
    x='avg_acidity',
    y='avg_ratings_average',
    trendline='ols',  # Add a trendline to show correlation
    title='Acidity vs Ratings',
    labels={'avg_acidity': 'Acidity Value', 'avg_ratings_average': 'Average Rating'}
)

# Update layout for better appearance
fig_sweetness.update_layout(
    xaxis_title='Sweetness Value',
    yaxis_title='Count',
    title_x=0.5  # Center the title
)

fig_sweetness_vs_ratings.update_layout(
    xaxis_title='Sweetness Value',
    yaxis_title='Average Rating',
    title_x=0.5,  # Center the title
)

fig_acidity_vs_ratings.update_layout(
    xaxis_title='Acidity Value',
    yaxis_title='Average Rating',
    title_x=0.5,  # Center the title
)

# Show the plots
fig_sweetness.show()
fig_sweetness_vs_ratings.show()
fig_acidity_vs_ratings.show()