# üîß Proje 1: Hiperparametre Optimizasyonu

**Ders:** Optimizasyon Algoritmalarƒ±  
**Veri Seti:** √áelik Levha Hata Tespiti  
**Ama√ß:** Grid Search, Random Search ve Bayesian Optimizasyonu kar≈üƒ±la≈ütƒ±rmak

---

## ƒ∞√ßindekiler
1. Giri≈ü
2. Kurulum ve ƒ∞√ße Aktarma
3. Veri Y√ºkleme
4. Veri Ke≈üfi
5. √ñn ƒ∞≈üleme
6. Grid Search
7. Random Search
8. Bayesian Optimizasyon
9. Sonu√ß Kar≈üƒ±la≈ütƒ±rmasƒ±
10. Sonu√ßlar

## 2. Kurulum ve ƒ∞√ße Aktarma

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import time

from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
from scipy.stats import uniform, randint

try:
    import optuna
    from optuna.samplers import TPESampler
    OPTUNA_AVAILABLE = True
    optuna.logging.set_verbosity(optuna.logging.WARNING)
except ImportError:
    OPTUNA_AVAILABLE = False

warnings.filterwarnings('ignore')
np.random.seed(42)
print("‚úÖ Libraries imported!" + (" Optuna: ‚úÖ" if OPTUNA_AVAILABLE else " Optuna: ‚ùå"))

## 3. Veri Y√ºkleme

In [None]:
# Column names
feature_names = ['X_Minimum', 'X_Maximum', 'Y_Minimum', 'Y_Maximum', 'Pixels_Areas',
    'X_Perimeter', 'Y_Perimeter', 'Sum_of_Luminosity', 'Minimum_of_Luminosity',
    'Maximum_of_Luminosity', 'Length_of_Conveyer', 'TypeOfSteel_A300',
    'TypeOfSteel_A400', 'Steel_Plate_Thickness', 'Edges_Index', 'Empty_Index',
    'Square_Index', 'Outside_X_Index', 'Edges_X_Index', 'Edges_Y_Index',
    'Outside_Global_Index', 'LogOfAreas', 'Log_X_Index', 'Log_Y_Index',
    'Orientation_Index', 'Luminosity_Index', 'SigmoidOfAreas']
class_names = ['Pastry', 'Z_Scratch', 'K_Scratch', 'Stains', 'Dirtiness', 'Bumps', 'Other_Faults']

# Load data
df = pd.read_csv('../data/raw/steel_plates_fault.csv', header=None)
df.columns = feature_names + class_names
print(f"‚úÖ Loaded: {df.shape[0]} samples, {len(feature_names)} features, {len(class_names)} classes")
df.head()

## 4. Veri Ke≈üfi

In [None]:
# Class distribution
y_labels = df[class_names].idxmax(axis=1)
print("üìä Class Distribution:")
for cls in class_names:
    count = (y_labels == cls).sum()
    print(f"  {cls:15}: {count:4} ({count/len(y_labels)*100:.1f}%)")

# Plot
fig, ax = plt.subplots(figsize=(10, 5))
y_labels.value_counts().plot(kind='bar', color=plt.cm.viridis(np.linspace(0.2, 0.8, 7)), ax=ax)
ax.set_title('Class Distribution', fontweight='bold')
ax.set_ylabel('Count')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

## 5. √ñn ƒ∞≈üleme

In [None]:
# Prepare data
X = df[feature_names].values
y = df[class_names].values.argmax(axis=1)

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"‚úÖ Train: {X_train.shape[0]}, Test: {X_test.shape[0]}")

## 6. Grid Search

T√ºm parametre kombinasyonlarƒ±nƒ± kapsamlƒ± olarak deƒüerlendirir.

In [None]:
# Parameter grids
param_grids = {
    'SVM': {'C': [0.1, 1, 10], 'gamma': ['scale', 'auto'], 'kernel': ['rbf']},
    'RandomForest': {'n_estimators': [50, 100], 'max_depth': [10, 15], 'min_samples_split': [2, 5]},
    'NeuralNetwork': {'hidden_layer_sizes': [(50,), (100,)], 'alpha': [0.001, 0.01]}
}

def grid_search(name):
    print(f"\nüîç Grid Search: {name}")
    model = {'SVM': SVC(random_state=42), 'RandomForest': RandomForestClassifier(random_state=42),
             'NeuralNetwork': MLPClassifier(random_state=42, max_iter=500)}[name]
    start = time.time()
    gs = GridSearchCV(model, param_grids[name], cv=5, scoring='accuracy', n_jobs=-1)
    gs.fit(X_train_scaled, y_train)
    t = time.time() - start
    print(f"  ‚è±Ô∏è {t:.1f}s | üéØ {gs.best_score_:.4f} | {gs.best_params_}")
    return {'model': name, 'method': 'Grid', 'score': gs.best_score_, 'time': t, 'est': gs.best_estimator_}

grid_results = [grid_search(m) for m in ['SVM', 'RandomForest', 'NeuralNetwork']]

## 7. Random Search

Rastgele parametre kombinasyonlarƒ± √∂rnekler.

In [None]:
# Parameter distributions
param_dists = {
    'SVM': {'C': uniform(0.1, 50), 'gamma': ['scale', 'auto'], 'kernel': ['rbf', 'poly']},
    'RandomForest': {'n_estimators': randint(50, 200), 'max_depth': randint(5, 20), 'min_samples_split': randint(2, 10)},
    'NeuralNetwork': {'hidden_layer_sizes': [(50,), (100,), (100, 50)], 'alpha': uniform(0.0001, 0.01)}
}

def random_search(name, n_iter=20):
    print(f"\nüé≤ Random Search: {name}")
    model = {'SVM': SVC(random_state=42), 'RandomForest': RandomForestClassifier(random_state=42),
             'NeuralNetwork': MLPClassifier(random_state=42, max_iter=500)}[name]
    start = time.time()
    rs = RandomizedSearchCV(model, param_dists[name], n_iter=n_iter, cv=5, scoring='accuracy', n_jobs=-1, random_state=42)
    rs.fit(X_train_scaled, y_train)
    t = time.time() - start
    print(f"  ‚è±Ô∏è {t:.1f}s | üéØ {rs.best_score_:.4f} | {rs.best_params_}")
    return {'model': name, 'method': 'Random', 'score': rs.best_score_, 'time': t, 'est': rs.best_estimator_}

random_results = [random_search(m) for m in ['SVM', 'RandomForest', 'NeuralNetwork']]

## 8. Bayesian Optimizasyon (Optuna)

Aramayƒ± akƒ±llƒ±ca y√∂nlendirmek i√ßin olasƒ±lƒ±ksal model kullanƒ±r.

In [None]:
def bayesian_opt(name, n_trials=20):
    if not OPTUNA_AVAILABLE:
        print(f"‚ö†Ô∏è Optuna not available")
        return None
    print(f"\nüß† Bayesian: {name}")
    
    def objective(trial):
        if name == 'SVM':
            p = {'C': trial.suggest_float('C', 0.1, 50, log=True), 'gamma': trial.suggest_categorical('gamma', ['scale', 'auto'])}
            m = SVC(**p, random_state=42)
        elif name == 'RandomForest':
            p = {'n_estimators': trial.suggest_int('n_estimators', 50, 200), 
                 'max_depth': trial.suggest_int('max_depth', 5, 20),
                 'min_samples_split': trial.suggest_int('min_samples_split', 2, 10)}
            m = RandomForestClassifier(**p, random_state=42)
        else:
            p = {'hidden_layer_sizes': trial.suggest_categorical('h', [(50,), (100,)]), 'alpha': trial.suggest_float('alpha', 0.0001, 0.01, log=True)}
            m = MLPClassifier(**p, random_state=42, max_iter=500)
        return cross_val_score(m, X_train_scaled, y_train, cv=5).mean()
    
    start = time.time()
    study = optuna.create_study(direction='maximize', sampler=TPESampler(seed=42))
    study.optimize(objective, n_trials=n_trials, show_progress_bar=False)
    t = time.time() - start
    print(f"  ‚è±Ô∏è {t:.1f}s | üéØ {study.best_value:.4f} | {study.best_params}")
    return {'model': name, 'method': 'Bayesian', 'score': study.best_value, 'time': t}

bayesian_results = [r for r in [bayesian_opt(m) for m in ['SVM', 'RandomForest', 'NeuralNetwork']] if r]

## 9. Sonu√ß Kar≈üƒ±la≈ütƒ±rmasƒ±

In [None]:
# Combine results
all_results = grid_results + random_results + bayesian_results
results_df = pd.DataFrame(all_results)

print("üìä All Results:")
display(results_df[['model', 'method', 'score', 'time']].round(4))

# Plot
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
pivot = results_df.pivot(index='model', columns='method', values='score')
pivot.plot(kind='bar', ax=axes[0], colormap='viridis', edgecolor='black')
axes[0].set_title('Accuracy Comparison', fontweight='bold')
axes[0].set_ylabel('Accuracy')
axes[0].legend(title='Method')

pivot_time = results_df.pivot(index='model', columns='method', values='time')
pivot_time.plot(kind='bar', ax=axes[1], colormap='plasma', edgecolor='black')
axes[1].set_title('Time Comparison', fontweight='bold')
axes[1].set_ylabel('Time (s)')
axes[1].legend(title='Method')

plt.tight_layout()
plt.show()

# Best
best = results_df.loc[results_df['score'].idxmax()]
print(f"\nüèÜ Best: {best['model']} with {best['method']} ({best['score']:.4f})")

## 10. Sonu√ßlar

### üéØ Temel Bulgular

| Y√∂ntem | Avantajlar | En ƒ∞yi Kullanƒ±m |
|--------|------------|-----------------|
| **Bayesian** | En verimli, ge√ßmi≈üten √∂ƒürenir | √úretim |
| **Random Search** | Hƒ±zlƒ±, iyi ke≈üif | Prototipleme |
| **Grid Search** | Garantili kapsam | K√º√ß√ºk alanlar |

### üìå √ñneriler
1. Pahalƒ± deƒüerlendirmeler i√ßin **Bayesian Optimizasyon** kullanƒ±n
2. Hƒ±zlƒ± ba≈ülangƒ±√ßlar i√ßin **Random Search** kullanƒ±n
3. Son ayarlama i√ßin **Grid Search** kullanƒ±n

‚úÖ **Proje tamamlandƒ±!**