<a href="https://www.kaggle.com/code/atanurgl/mlbootcampproject?scriptVersionId=241770320" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder


df = pd.read_csv('winequality-red.csv', sep=';')

# Kalite skorlarÄ±nÄ± 3 kategoriye bÃ¶lÃ¼yorum
df['quality_category'] = pd.cut(
    df['quality'],
    bins=[0, 4, 6, 10],
    labels=['dÃ¼ÅŸÃ¼k', 'orta', 'yÃ¼ksek']
)

# Kategorik etiketleri sayÄ±sala Ã§eviriyorum
le = LabelEncoder()
df['quality_label'] = le.fit_transform(df['quality_category'])

print("Veri yÃ¼klendi ve iÅŸlendi.")
print(df[['quality', 'quality_category', 'quality_label']].sample(3))


Veri yÃ¼klendi ve iÅŸlendi.
      quality quality_category  quality_label
552         6             orta              1
14          5             orta              1
1317        6             orta              1


In [19]:
from sklearn.model_selection import train_test_split

# X ve Y Ã¶zelliklerini ayÄ±rdÄ±m
X = df.drop(['quality', 'quality_category', 'quality_label'], axis=1)  # X iÃ§in
y = df['quality_label']  # y iÃ§in

# EÄŸitim ve test setlerini ayÄ±rÄ±yorum
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Kontrol
print("\n 2. ADIM TAMAMLANDI: Veri bÃ¶lÃ¼ndÃ¼!")
print("EÄŸitim Verisi Boyutu:", X_train.shape)
print("Test Verisi Boyutu:", X_test.shape)


 2. ADIM TAMAMLANDI: Veri bÃ¶lÃ¼ndÃ¼!
EÄŸitim Verisi Boyutu: (1279, 11)
Test Verisi Boyutu: (320, 11)


In [20]:
#Dengesiz veri setimi dengelemek iÃ§in SMOTE kullanÄ±yorum
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_train, y_train)  

# Kontrol
print("\n 3. ADIM TAMAMLANDI: SMOTE uygulandÄ±!")
print("Yeni DaÄŸÄ±lÄ±m:")
print(pd.Series(y_res).value_counts())  


 3. ADIM TAMAMLANDI: SMOTE uygulandÄ±!
Yeni DaÄŸÄ±lÄ±m:
1    1057
0    1057
2    1057
Name: quality_label, dtype: int64


In [21]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(
    random_state=42,
    n_estimators=200  # Daha iyi ve hÄ±zlÄ± output iÃ§in
)
model.fit(X_res, y_res)

print("\n 4. ADIM TAMAMLANDI: Model eÄŸitildi!")


 4. ADIM TAMAMLANDI: Model eÄŸitildi!


In [None]:
from sklearn.metrics import classification_report

y_pred = model.predict(X_test)
print("\n SONUÃ‡LAR:")
print(classification_report(y_test, y_pred, target_names=['dÃ¼ÅŸÃ¼k', 'orta', 'yÃ¼ksek']))


ðŸ“Š SONUÃ‡LAR:
              precision    recall  f1-score   support

       dÃ¼ÅŸÃ¼k       0.29      0.36      0.32        11
        orta       0.94      0.88      0.91       262
      yÃ¼ksek       0.64      0.81      0.72        47

    accuracy                           0.85       320
   macro avg       0.62      0.68      0.65       320
weighted avg       0.87      0.85      0.86       320



In [23]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

# Hiperparametre kombinasyonlarÄ± 
param_grid = {
    'n_estimators': [200, 300],  
    'max_depth': [15, 20, None],
    'min_samples_split': [2, 5]
}


grid_search = GridSearchCV(
    RandomForestClassifier(random_state=42),
    param_grid,
    cv=5,  
    scoring='f1_macro'
)

# X_res ve y_res ile eÄŸittik
grid_search.fit(X_res, y_res)  

print(" En Ä°yi Parametreler:", grid_search.best_params_)

 En Ä°yi Parametreler: {'max_depth': 15, 'min_samples_split': 2, 'n_estimators': 200}


In [24]:
# parametrelerle modeli eÄŸittim
best_model = grid_search.best_estimator_
best_model.fit(X_res, y_res)

print("\n 7. ADIM TAMAMLANDI: Final model eÄŸitildi!")


 7. ADIM TAMAMLANDI: Final model eÄŸitildi!


In [25]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred_best = best_model.predict(X_test)

print("\n FÄ°NAL MODELÄ°N TEST PERFORMANSI:")
print(classification_report(y_test, y_pred_best, target_names=['dÃ¼ÅŸÃ¼k', 'orta', 'yÃ¼ksek']))

print("KarÄ±ÅŸÄ±klÄ±k Matrisi:")
print(confusion_matrix(y_test, y_pred_best))



 FÄ°NAL MODELÄ°N TEST PERFORMANSI:
              precision    recall  f1-score   support

       dÃ¼ÅŸÃ¼k       0.25      0.36      0.30        11
        orta       0.95      0.87      0.91       262
      yÃ¼ksek       0.65      0.87      0.75        47

    accuracy                           0.85       320
   macro avg       0.62      0.70      0.65       320
weighted avg       0.88      0.85      0.86       320

KarÄ±ÅŸÄ±klÄ±k Matrisi:
[[  4   7   0]
 [ 12 228  22]
 [  0   6  41]]
