In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt # type: ignore
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

# Load Dataset
df_wine = pd.read_csv("winequality-red.csv")  # Update path if needed

# Data Exploration
print(df_wine.describe())
print(df_wine.info())
print("Missing values:", df_wine.isnull().sum().sum())

# Visualization
plt.figure(figsize=(10, 6))
sns.heatmap(df_wine.corr(), annot=True, cmap='coolwarm')
plt.show()

df_wine.hist(figsize=(12, 10))
plt.show()

# Data Preprocessing
scaler = MinMaxScaler()
X = scaler.fit_transform(df_wine.drop("quality", axis=1))

y = LabelEncoder().fit_transform(df_wine["quality"])  # Encode quality labels

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define ANN Model
def create_model(layers=[64, 32], activation='relu', optimizer='adam'):
    model = Sequential()
    model.add(Dense(layers[0], activation=activation, input_shape=(X_train.shape[1],)))
    for layer_size in layers[1:]:
        model.add(Dense(layer_size, activation=activation))
        model.add(Dropout(0.2))  # Reduce overfitting
    model.add(Dense(len(set(y)), activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

# Train Model
model = create_model()
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=32, verbose=1)

# Evaluate Model
y_pred = np.argmax(model.predict(X_test), axis=1)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Hyperparameter Tuning (Grid Search & Random Search)
param_grid = {
    'layers': [[64, 32], [128, 64], [64, 64, 32]],
    'activation': ['relu', 'tanh'],
    'optimizer': ['adam', 'rmsprop'],
    'batch_size': [16, 32],
    'epochs': [50, 100]
}

keras_model = KerasClassifier(build_fn=create_model, verbose=0)

# Grid Search
grid = GridSearchCV(estimator=keras_model, param_grid=param_grid, cv=3, n_jobs=-1)
grid.fit(X_train, y_train)
print("Best Parameters (Grid Search):", grid.best_params_)

# Random Search
random_search = RandomizedSearchCV(estimator=keras_model, param_distributions=param_grid, n_iter=10, cv=3, n_jobs=-1)
random_search.fit(X_train, y_train)
print("Best Parameters (Random Search):", random_search.best_params_)

# Plot Training vs Validation Performance
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.legend()
plt.title("Model Accuracy Over Epochs")
plt.show()

plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend()
plt.title("Model Loss Over Epochs")
plt.show()
