In [1]:
import pandas as pd
import numpy as np
import seaborn as sns



In [9]:
df_red = pd.read_csv('./data/winequality-red.csv', sep = ';')
df_white = pd.read_csv('./data/winequality-white.csv', sep = ';')

In [10]:
# Looking at red wine quality data
df_red.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [11]:
# Looking at white wine quality data
df_white.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [12]:
df_red.columns

Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')

In [13]:
df_white.columns

Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')

## **Columns Explained**

1. **fixed acidity**  
   - Non-volatile acids (e.g., tartaric acid) that don’t evaporate easily.  
   - Contribute to tartness and stability.  
   - Think of this as the "backbone" of acidity.

2. **volatile acidity**  
   - Acids that can evaporate (e.g., acetic acid, like vinegar).  
   - High levels give unpleasant vinegar smell/taste.  
   - A little is fine; too much ruins the wine.

3. **citric acid**  
   - A natural preservative that can add freshness and slight lemony flavor.  
   - Small amounts improve taste; too much makes wine harsh.

4. **residual sugar**  
   - Leftover sugar after fermentation.  
   - Low = dry wine, higher = sweet wine.  
   - Example: dry red <1 g/L, dessert wine much higher.

5. **chlorides**  
   - Amount of salt (NaCl) in the wine.  
   - High levels → salty taste (undesirable).

6. **free sulfur dioxide**  
   - Portion of SO₂ that is free in the wine (not bound).  
   - Acts as antimicrobial and antioxidant.  
   - Too much affects smell/taste.

7. **total sulfur dioxide**  
   - Free + bound SO₂.  
   - High levels give sharp, unpleasant smell (like struck match).  
   - Controlled carefully in winemaking.

8. **density**  
   - Related to alcohol and sugar content.  
   - Water = 1.000; wines slightly above/below depending on alcohol/sugar.

9. **pH**  
   - Measure of acidity/basicity.  
   - Lower = more acidic.  
   - Wines typically between 3–4.  
   - Influences taste, color, stability.

10. **sulphates**  
    - Compounds that contribute to SO₂ levels.  
    - Aid preservation and affect aroma.  
    - Balance is important.

11. **alcohol**  
    - Percentage of alcohol by volume.  
    - Strongly influences body, warmth, and perceived sweetness.

12. **quality**  
    - Target variable (score typically 0–10).  
    - Assigned by wine experts based on taste, smell, balance, etc.  
    - In this dataset: mostly between 3–8.