In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
df = pd.read_csv(url, sep=';')

# Binarize quality: 1 for good (>= 7), 0 otherwise
df['quality'] = df['quality'].apply(lambda x: 1 if x >= 7 else 0)

# Separate features and target
X = df.drop('quality', axis=1)
y = df['quality']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

# Initialize and train Random Forest
rf = RandomForestClassifier(
    n_estimators=100,      # number of trees
    max_depth=6,           # limit depth to avoid overfitting
    min_samples_split=5,   # minimum samples to split a node
    random_state=42
)
rf.fit(X_train, y_train)

# Predict
y_pred = rf.predict(X_test)

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9125

Classification Report:
               precision    recall  f1-score   support

           0       0.91      1.00      0.95       277
           1       0.94      0.37      0.53        43

    accuracy                           0.91       320
   macro avg       0.93      0.68      0.74       320
weighted avg       0.91      0.91      0.90       320

