In [None]:
import pandas as pd

data = pd.read_csv('WineQT.csv')

data.head()

In [None]:
data['quality'].value_counts()

In [None]:
porcentajes = data['quality'].value_counts(normalize=True) * 100
print(porcentajes)

In [None]:
data.shape

In [None]:
data.info()

In [None]:
data.isnull().sum()

In [None]:
data = data.drop('Id', axis=1)
data.head()

In [None]:
data.describe()

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(25,10))

for i, col in enumerate(data.columns):
    plt.subplot((len(data.columns) + 4) // 5, 5, i + 1)
    plt.hist(data[col], bins=20, color='skyblue', edgecolor='black')
    plt.title(col)
plt.show()

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(25,10))

for i, col in enumerate(data.columns):
    plt.subplot((len(data.columns) + 4) // 5, 5, i + 1)
    plt.boxplot(data[col])
    plt.title(col)
plt.show()

# Construir nuestra red neuronal

In [None]:
# Separar los datos

X = data.drop('quality', axis=1)
y = data['quality']

In [None]:
# Division de los datos

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=42, stratify=y)

In [None]:
# Normalizar

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_norm = scaler.fit_transform(X_train) 
X_test_norm = scaler.fit_transform(X_test) 

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np


# Codificar las clases
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)


# División estratificada
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded,
    train_size=0.8,
    random_state=42,
    stratify=y_encoded
)

# Modelo de red neuronal
model = Sequential([
    Input(shape=(X_train_norm.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(len(np.unique(y_encoded)), activation='softmax')  
])

model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Entrenamiento
model.fit(X_train_norm, y_train, epochs=100, batch_size=16, verbose=1)

# Evaluación
loss, accuracy = model.evaluate(X_test_norm, y_test, verbose=0)
print(f"Loss: {loss:.4f}  |  Accuracy: {accuracy:.4f}")


In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np

# Predicciones (en índices de clase)
y_pred_probs = model.predict(X_test_norm)
y_pred = np.argmax(y_pred_probs, axis=1)

# Matriz de confusión
cm = confusion_matrix(y_test, y_pred)

# Nombres de clases originales
class_names = encoder.classes_  # recupera las etiquetas originales

# Visualización
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
disp.plot(cmap='Blues', values_format='d')
plt.title('Matriz de Confusión - Red Neuronal')
plt.show()


In [None]:
from sklearn.metrics import classification_report
import numpy as np

y_pred_probs = model.predict(X_test_norm)
y_pred = np.argmax(y_pred_probs, axis=1)

# Convertir las clases a strings
class_names = [str(c) for c in encoder.classes_]

# Reporte de clasificación
report = classification_report(y_test, y_pred, target_names=class_names, zero_division=0)
print(report)



In [None]:
# Crear nueva columna con clases agrupadas
data['quality_group'] = np.where(data['quality'] <= 5, 'mala', 'buena')

data.head()

In [None]:
data['quality_group'].value_counts()

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

# 1️Crear variable binaria a partir de 'quality'
data['quality_group'] = np.where(data['quality'] <= 5, 0, 1)  # 0 = mala, 1 = buena

#  Definir X e y
X = data.drop(columns=['quality', 'quality_group'])
y = data['quality_group']  # ya numérica

# División estratificada
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

#  Escalar variables numéricas
scaler = StandardScaler()
X_train_norm = scaler.fit_transform(X_train)
X_test_norm = scaler.transform(X_test)

#  Modelo binario
model = Sequential([
    Input(shape=(X_train_norm.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# 6️⃣ Entrenamiento
model.fit(X_train_norm, y_train, epochs=100, batch_size=16, verbose=1)

# 7️⃣ Evaluación
loss, accuracy = model.evaluate(X_test_norm, y_test, verbose=0)
print(f"✅ Loss: {loss:.4f}  |  Accuracy: {accuracy:.4f}")



In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np

# Obtener predicciones del modelo
y_pred_prob = model.predict(X_test_norm)
y_pred = (y_pred_prob > 0.5).astype(int).flatten()  # 0 = mala, 1 = buena

# Crear matriz de confusión
cm = confusion_matrix(y_test, y_pred)

# Mostrar etiquetas personalizadas
labels = ['Mala calidad', 'Buena calidad']

# Visualización
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
disp.plot(cmap='Blues', values_format='d')
plt.title('Matriz de Confusión - Clasificación Binaria (Vinos)')
plt.show()
