# ü§ñ Proje 2: Makine √ñƒürenimi ve √ñr√ºnt√º Tanƒ±ma

**Ders:** Makine √ñƒürenimi ve √ñr√ºnt√º Tanƒ±ma  
**Veri Seti:** Steel Plates Fault Detection  
**Ama√ß:** Birden fazla sƒ±nƒ±flandƒ±rma algoritmasƒ±nƒ± eƒüitmek ve kar≈üƒ±la≈ütƒ±rmak

## 1. Kurulum

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

warnings.filterwarnings('ignore')
np.random.seed(42)
print("‚úÖ Libraries imported!")

## 2. Veri Y√ºkleme

In [None]:
feature_names = ['X_Minimum', 'X_Maximum', 'Y_Minimum', 'Y_Maximum', 'Pixels_Areas',
    'X_Perimeter', 'Y_Perimeter', 'Sum_of_Luminosity', 'Minimum_of_Luminosity',
    'Maximum_of_Luminosity', 'Length_of_Conveyer', 'TypeOfSteel_A300',
    'TypeOfSteel_A400', 'Steel_Plate_Thickness', 'Edges_Index', 'Empty_Index',
    'Square_Index', 'Outside_X_Index', 'Edges_X_Index', 'Edges_Y_Index',
    'Outside_Global_Index', 'LogOfAreas', 'Log_X_Index', 'Log_Y_Index',
    'Orientation_Index', 'Luminosity_Index', 'SigmoidOfAreas']
class_names = ['Pastry', 'Z_Scratch', 'K_Scratch', 'Stains', 'Dirtiness', 'Bumps', 'Other_Faults']

df = pd.read_csv('../data/raw/steel_plates_fault.csv', header=None)
df.columns = feature_names + class_names
print(f"‚úÖ Loaded: {df.shape}")
df.head()

## 3. Ke≈üifsel Veri Analizi

In [None]:
# Statistics
print("üìä Feature Statistics:")
print(df[feature_names].describe().round(2))

# Class distribution
y_labels = df[class_names].idxmax(axis=1)
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Distribution plot
y_labels.value_counts().plot(kind='bar', ax=axes[0], color=plt.cm.viridis(np.linspace(0.2, 0.8, 7)))
axes[0].set_title('Class Distribution', fontweight='bold')
axes[0].set_ylabel('Count')

# Correlation heatmap
corr = df[feature_names].corr()
sns.heatmap(corr, ax=axes[1], cmap='coolwarm', center=0, square=True, linewidths=0.5)
axes[1].set_title('Feature Correlation', fontweight='bold')

plt.tight_layout()
plt.show()

## 4. √ñn ƒ∞≈üleme

In [None]:
X = df[feature_names].values
y = df[class_names].values.argmax(axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"‚úÖ Train: {X_train.shape[0]}, Test: {X_test.shape[0]}")

## 5. Model Eƒüitimi ve Deƒüerlendirme

In [None]:
# Define models
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000, random_state=42),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42),
    'SVM': SVC(random_state=42),
    'KNN': KNeighborsClassifier(),
    'Naive Bayes': GaussianNB(),
    'Neural Network': MLPClassifier(hidden_layer_sizes=(100,), max_iter=500, random_state=42)
}

# Train and evaluate
results = []
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    
    results.append({
        'Model': name,
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred, average='weighted'),
        'Recall': recall_score(y_test, y_pred, average='weighted'),
        'F1-Score': f1_score(y_test, y_pred, average='weighted')
    })
    print(f"‚úÖ {name}: {results[-1]['Accuracy']:.4f}")

results_df = pd.DataFrame(results).sort_values('Accuracy', ascending=False)
display(results_df.round(4))

## 6. Model Kar≈üƒ±la≈ütƒ±rmasƒ±

In [None]:
# Visualization
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Accuracy comparison
results_df_sorted = results_df.sort_values('Accuracy', ascending=True)
colors = plt.cm.RdYlGn(np.linspace(0.3, 0.9, len(results_df_sorted)))
axes[0].barh(results_df_sorted['Model'], results_df_sorted['Accuracy'], color=colors)
axes[0].set_xlabel('Accuracy')
axes[0].set_title('Model Accuracy Comparison', fontweight='bold')
axes[0].set_xlim(0.6, 0.85)

# Metrics comparison
metrics = ['Accuracy', 'Precision', 'Recall', 'F1-Score']
x = np.arange(len(results_df))
width = 0.2
for i, metric in enumerate(metrics):
    axes[1].bar(x + i*width, results_df[metric], width, label=metric)
axes[1].set_xticks(x + 1.5*width)
axes[1].set_xticklabels(results_df['Model'], rotation=45, ha='right')
axes[1].legend()
axes[1].set_title('All Metrics Comparison', fontweight='bold')

plt.tight_layout()
plt.show()

## 7. En ƒ∞yi Model Analizi

In [None]:
# Confusion matrix for best model
best_model_name = results_df.iloc[0]['Model']
best_model = models[best_model_name]
y_pred = best_model.predict(X_test_scaled)

print(f"üèÜ Best Model: {best_model_name}")
print(f"\nüìã Classification Report:")
print(classification_report(y_test, y_pred, target_names=class_names))

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title(f'Confusion Matrix - {best_model_name}', fontweight='bold')
plt.tight_layout()
plt.show()

## 8. √ñzellik √ñnemi

In [None]:
# Feature importance (Random Forest)
rf_model = models['Random Forest']
importance = pd.DataFrame({
    'Feature': feature_names,
    'Importance': rf_model.feature_importances_
}).sort_values('Importance', ascending=False)

plt.figure(figsize=(10, 8))
plt.barh(importance['Feature'][:15], importance['Importance'][:15], color='steelblue')
plt.xlabel('Importance')
plt.title('Top 15 Feature Importance (Random Forest)', fontweight='bold')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()

print("\nüìä Top 5 Features:")
display(importance.head())

## 9. Sonu√ßlar

### üéØ Temel Bulgular

1. **Random Forest** en iyi doƒüruluƒüu elde etti
2. **Topluluk y√∂ntemleri** tekil modellerden √ºst√ºn
3. **Piksel alanƒ±** en √∂nemli √∂zellik
4. Sƒ±nƒ±f dengesizliƒüi azƒ±nlƒ±k sƒ±nƒ±fƒ± tahminini etkiliyor

### üìå √ñneriler
- √úretim i√ßin Random Forest kullanƒ±n
- Dengesiz sƒ±nƒ±flar i√ßin sƒ±nƒ±f aƒüƒ±rlƒ±klarƒ±nƒ± d√º≈ü√ºn√ºn

‚úÖ **Proje tamamlandƒ±!**