<a href="https://colab.research.google.com/github/ahmetbekir22/wine-quality-prediction/blob/main/White_wine_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
# Gerekli kütüphaneleri import edelim
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Veriyi yükleyin ve hedef ile özellikleri ayırın
wine_data = pd.read_csv('/content/winequality-white.csv', sep=';')
X = wine_data.drop('quality', axis=1)
y = wine_data['quality']

# Veriyi eğitim ve test setlerine ayıralım
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Özellikleri standardize edelim
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Random Forest modeli tanımlayalım
rf_model = RandomForestRegressor(random_state=42)

# GridSearchCV için parametreler
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
}

# GridSearchCV ile modelin hyperparametrelerini optimize edelim
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(X_train_scaled, y_train)

# En iyi parametreleri ve modelin performansını yazdıralım
print("Best Parameters:", grid_search.best_params_)

best_rf_model = grid_search.best_estimator_

# Modeli kaydedelim
joblib.dump(best_rf_model, 'best_random_forest_model.pkl')
joblib.dump(scaler, 'scaler.pkl')  # StandardScaler'ı kaydedelim

# Test seti üzerinde tahmin yapalım
y_pred = best_rf_model.predict(X_test_scaled)

# Modelin performansını değerlendirelim
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Accuracy score hesaplayalım
accuracy = (abs(y_pred - y_test) < 1).mean()  # Tahminin hedef değere yakın olmasını kontrol et

# Sonuçları yazdıralım
print("Mean Squared Error (MSE):", mse)
print("R2 Score:", r2)
print("Accuracy Score:", accuracy)


Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best Parameters: {'max_depth': 30, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 300}
Mean Squared Error (MSE): 0.36291715558661874
R2 Score: 0.5696678225935584
Accuracy Score: 0.9003051881993896


In [13]:
from sklearn.model_selection import cross_val_score

# Random Forest modeline 5-fold cross-validation uygulayalım
cv_scores = cross_val_score(best_rf_model, X_train_scaled, y_train, cv=5, scoring='neg_mean_squared_error')

# Ortalama MSE hesaplayalım
mean_cv_score = -cv_scores.mean()  # Çünkü cross_val_score negatif değer döndürüyor
print("Average Cross-Validation MSE:", mean_cv_score)


Average Cross-Validation MSE: 0.3948619221550579


In [14]:
from xgboost import XGBRegressor

# XGBoost modelini tanımlayalım
xgb_model = XGBRegressor(n_estimators=100, random_state=42)

# Modeli eğitelim
xgb_model.fit(X_train_scaled, y_train)

# Test seti üzerinde tahmin yapalım
y_pred_xgb = xgb_model.predict(X_test_scaled)

# Modelin performansını değerlendirelim
mse_xgb = mean_squared_error(y_test, y_pred_xgb)
r2_xgb = r2_score(y_test, y_pred_xgb)

# Accuracy score hesaplayalım
accuracy_xgb = (abs(y_pred_xgb - y_test) < 1).mean()  # Tahminin hedef değere yakın olmasını kontrol et

# Sonuçları yazdıralım
print("XGBoost Mean Squared Error (MSE):", mse_xgb)
print("XGBoost R2 Score:", r2_xgb)
print("XGBoost Accuracy Score:", accuracy_xgb)


XGBoost Mean Squared Error (MSE): 0.37960300110465317
XGBoost R2 Score: 0.5498824119567871
XGBoost Accuracy Score: 0.8952187182095626
