# 03 - Data Visualization & Insights (WineQT)

Histogramas, boxplots, scatter plots e heatmaps + insights de qualidade.



In [None]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from src.data_ingestion import load_wine_dataframe

HF_REPO = os.getenv("HF_DATASET_REPO", "henriquebap/wine-ml-dataset")
FILENAME = os.getenv("HF_DATASET_FILENAME", "WineQT.csv")

df = load_wine_dataframe(repo_id=HF_REPO, filename=FILENAME)
df.head()


In [None]:
sns.set_theme()
# Histograms
fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(14,12))
axes = axes.flatten()
for i, col in enumerate([c for c in df.columns if c != 'quality']):
    sns.histplot(df[col], kde=True, ax=axes[i])
    axes[i].set_title(col)
plt.tight_layout()
plt.show()


In [None]:
# Boxplots by quality
fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(14,12))
axes = axes.flatten()
for i, col in enumerate([c for c in df.columns if c != 'quality']):
    sns.boxplot(data=df, x='quality', y=col, ax=axes[i])
plt.tight_layout()
plt.show()


In [None]:
# Scatter plots against target
num_cols = [c for c in df.columns if c != 'quality']
fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(14,12))
axes = axes.flatten()
for i, col in enumerate(num_cols):
    sns.scatterplot(data=df, x=col, y='quality', ax=axes[i], alpha=0.5)
plt.tight_layout()
plt.show()


In [None]:
# Correlation heatmap
plt.figure(figsize=(10,8))
sns.heatmap(df.corr(numeric_only=True), cmap='coolwarm', center=0)
plt.show()
