**Wine Quality Data Set**

In [1]:
#import libraries
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#reads the data set
labels = ('fixed_acidity','volatile_acidity','citric_acid','residual_sugar','chlorides','free_sulfur_dioxide','total_sulfur_dioxide','density','pH','sulphates','alcohol','quality')
wd_white = pd.read_csv('./data/winequality-white.csv', header = 0, names = labels,sep = ';')
wd_red = pd.read_csv('./data/winequality-red.csv', header = 0, names = labels, sep = ';')
whiteData = np.genfromtxt('./data/winequality-white.csv', skip_header=1, delimiter=';')
redData = np.genfromtxt('./data/winequality-red.csv', skip_header=1, delimiter=';')

In [28]:
#evaluates the data from the white wine 
# wd_white.head() #returns the first n-rows for the object based on position
print(whiteData)

[[ 7.    0.27  0.36 ...  0.45  8.8   6.  ]
 [ 6.3   0.3   0.34 ...  0.49  9.5   6.  ]
 [ 8.1   0.28  0.4  ...  0.44 10.1   6.  ]
 ...
 [ 6.5   0.24  0.19 ...  0.46  9.4   6.  ]
 [ 5.5   0.29  0.3  ...  0.38 12.8   7.  ]
 [ 6.    0.21  0.38 ...  0.32 11.8   6.  ]]


In [39]:
#evaluate the data from the red wine
# wd_red.head() #returns the first n-rows for the object based on position
print(redData)

[[ 7.4    0.7    0.    ...  0.56   9.4    5.   ]
 [ 7.8    0.88   0.    ...  0.68   9.8    5.   ]
 [ 7.8    0.76   0.04  ...  0.65   9.8    5.   ]
 ...
 [ 6.3    0.51   0.13  ...  0.75  11.     6.   ]
 [ 5.9    0.645  0.12  ...  0.71  10.2    5.   ]
 [ 6.     0.31   0.47  ...  0.66  11.     6.   ]]


In [4]:
wd_red.info() #displays the red wine info

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1599 entries, 0 to 1598
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fixed_acidity         1599 non-null   float64
 1   volatile_acidity      1599 non-null   float64
 2   citric_acid           1599 non-null   float64
 3   residual_sugar        1599 non-null   float64
 4   chlorides             1599 non-null   float64
 5   free_sulfur_dioxide   1599 non-null   float64
 6   total_sulfur_dioxide  1599 non-null   float64
 7   density               1599 non-null   float64
 8   pH                    1599 non-null   float64
 9   sulphates             1599 non-null   float64
 10  alcohol               1599 non-null   float64
 11  quality               1599 non-null   int64  
dtypes: float64(11), int64(1)
memory usage: 150.0 KB


In [5]:
wd_white.info() #displays the white wine info

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4898 entries, 0 to 4897
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fixed_acidity         4898 non-null   float64
 1   volatile_acidity      4898 non-null   float64
 2   citric_acid           4898 non-null   float64
 3   residual_sugar        4898 non-null   float64
 4   chlorides             4898 non-null   float64
 5   free_sulfur_dioxide   4898 non-null   float64
 6   total_sulfur_dioxide  4898 non-null   float64
 7   density               4898 non-null   float64
 8   pH                    4898 non-null   float64
 9   sulphates             4898 non-null   float64
 10  alcohol               4898 non-null   float64
 11  quality               4898 non-null   int64  
dtypes: float64(11), int64(1)
memory usage: 459.3 KB


# Data Exploration

In [2]:
print('White Wine Data Exploration\n')

for i in range(whiteData.shape[-1]):
    print('White Data Feature:', labels[i].title())
    print('Maximum:', np.max(whiteData[:, i]))
    print('Minimum:', np.min(whiteData[:, i]))
    print('Mean:', np.mean(whiteData[:, i]))
    print('Variance:', np.var(whiteData[:, i]))
    print('Standard Deviation:', np.std(whiteData[:, i]))
    print()

White Wine Data Exploration

White Data Feature: Fixed_Acidity
Maximum: 14.2
Minimum: 3.8
Mean: 6.854787668436097
Variance: 0.7119681970549558
Standard Deviation: 0.8437820791264506

White Data Feature: Volatile_Acidity
Maximum: 1.1
Minimum: 0.08
Mean: 0.27824111882400976
Variance: 0.010157466769838475
Standard Deviation: 0.10078425854188974

White Data Feature: Citric_Acid
Maximum: 1.66
Minimum: 0.0
Mean: 0.33419150673744386
Variance: 0.014642802851506794
Standard Deviation: 0.12100744957029214

White Data Feature: Residual_Sugar
Maximum: 65.8
Minimum: 0.6
Mean: 6.391414863209474
Variance: 25.720517863413225
Standard Deviation: 5.071539989333933

White Data Feature: Chlorides
Maximum: 0.346
Minimum: 0.009
Mean: 0.04577235606369946
Variance: 0.00047723625500429255
Standard Deviation: 0.02184573768505638

White Data Feature: Free_Sulfur_Dioxide
Maximum: 289.0
Minimum: 2.0
Mean: 35.30808493262556
Variance: 289.18366676943
Standard Deviation: 17.005401105808414

White Data Feature: Total_

In [3]:
print('Red Wine Data Exploration\n')

for i in range(redData.shape[-1]):
    print('Red Data Feature:', labels[i].title())
    print('Maximum:', np.max(redData[:, i]))
    print('Minimmum:', np.min(redData[:, i]))
    print('Mean:', np.mean(redData[:, i]))
    print('Variance:', np.var(redData[:, i]))
    print('Standard Deviation:', np.std(redData[:, i]))
    print()

Red Wine Data Exploration

Red Data Feature: Fixed_Acidity
Maximum: 15.9
Minimmum: 4.6
Mean: 8.31963727329581
Variance: 3.0295205688671114
Standard Deviation: 1.7405518001102729

Red Data Feature: Volatile_Acidity
Maximum: 1.58
Minimmum: 0.12
Mean: 0.5278205128205128
Variance: 0.0320423261333205
Standard Deviation: 0.17900370424469014

Red Data Feature: Citric_Acid
Maximum: 1.0
Minimmum: 0.0
Mean: 0.2709756097560976
Variance: 0.03792375112494089
Standard Deviation: 0.19474021445233364

Red Data Feature: Residual_Sugar
Maximum: 15.5
Minimmum: 0.9
Mean: 2.53880550343965
Variance: 1.9866539202698996
Standard Deviation: 1.4094871124880495

Red Data Feature: Chlorides
Maximum: 0.611
Minimmum: 0.012
Mean: 0.08746654158849279
Variance: 0.0022137573233114347
Standard Deviation: 0.04705058260331571

Red Data Feature: Free_Sulfur_Dioxide
Maximum: 72.0
Minimmum: 1.0
Mean: 15.874921826141339
Variance: 109.34645676374501
Standard Deviation: 10.45688561493072

Red Data Feature: Total_Sulfur_Dioxide
