# Wine Quality Analysis

## Process and Clean Data

In [None]:
# Import modules
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler

In [None]:
# Get data from csv file
red_wine_df = pd.read_csv('resources/winequality-red.csv', sep=';')
white_wine_df = pd.read_csv('resources/winequality-white.csv', sep=';')

# Create a new column 'color' and assign '1' to all rows for red wine
red_wine_df['color'] = 1

# Create a new column 'color' and assign '0' to all rows for white wine
white_wine_df['color'] = 0

# Create a new dataframe 'wine_df' by combining red_wine_df and white_wine_df and reset the index
wine_df = pd.concat([red_wine_df, white_wine_df], ignore_index=True)

# Display wine_df
wine_df

In [None]:
# Describe the dataframe
wine_df.describe()

In [None]:
# Get the dataframe's info
wine_df.info()

In [None]:
# Look for null values
wine_df.isnull().sum()

In [None]:
# Drop any null values
wine_df = wine_df.dropna().reset_index(drop=True)

wine_df

In [None]:
# Check for duplicate rows
wine_df.duplicated().sum()

In [None]:
# Drop duplicate rows and reset index
wine_df = wine_df.drop_duplicates().reset_index(drop=True)

wine_df

In [None]:
# Get a count of the unique values in the quality column
wine_df['quality'].value_counts()

In [None]:
# Export the cleaned data to a new csv file
wine_df.to_csv('resources/winequality-cleaned.csv', index=False)

## Explore the Data

## Split the Test and Training Data

In [None]:
# Create bins for the quality column

# Two Bins for 0-5 and 6-10
bins = (0, 5, 10)

# Name the bins 0 for low quality and 1 for high quality
group_names = [0, 1]

# Rename teh values in the quality column to the bin names
wine_df['quality'] = pd.cut(wine_df['quality'], bins=bins, labels=group_names)

# List unique values in the quality column
wine_df['quality'].unique()

In [None]:
wine_df['quality'].value_counts()

## Compare Different Models

## Identify Most and Least Important Features

## Hyperparameter Optimization

## Conclusions