
# Wine Quality Analysis (Jupyter Notebook)

This notebook explores the **Wine Quality (red wine)** dataset from the UCI Machine Learning Repository. 
We demonstrate how to load the dataset, inspect its structure, visualize the distribution of quality scores, and compute correlations between physicochemical properties and the wine quality score.


In [None]:

import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset; download if it does not exist
local_path = 'winequality-red.csv'
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'

if not os.path.exists(local_path):
    import requests
    print('Downloading dataset from UCI repository...')
    r = requests.get(url)
    with open(local_path, 'wb') as f:
        f.write(r.content)

# Read the CSV file (semicolon separated)
df = pd.read_csv(local_path, sep=';')
print("First five rows of the dataset:")
print(df.head())

print("
Summary statistics:")
print(df.describe())


In [None]:

# Plot distribution of quality scores
quality_counts = df['quality'].value_counts().sort_index()
plt.figure(figsize=(8,5))
plt.bar(quality_counts.index, quality_counts.values, color='skyblue')
plt.xlabel('Wine Quality Score')
plt.ylabel('Number of Samples')
plt.title('Distribution of Wine Quality Scores')
plt.xticks(quality_counts.index)
plt.show()


In [None]:

# Compute correlation between physicochemical properties and quality
correlations = df.corr()['quality'].drop('quality')
print('
Correlation of features with wine quality:')
print(correlations)

# Plot heatmap of correlations
plt.figure(figsize=(10,8))
sns.heatmap(df.corr(), annot=False, cmap='coolwarm', center=0)
plt.title('Correlation Heatmap')
plt.show()
