In [43]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score
import numpy as np

In [51]:
data = pd.read_csv('sonardata.csv', header=None)

# Separat features and target
X = data.drop(columns= 60)
y = data[60]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Initializing  models
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Support Vector Machine': SVC(),
    'Random Forest': RandomForestClassifier(),
    'Gradient Boosting': GradientBoostingClassifier()
}


results = {}

#iteratively combing through models
for model_name, model in models.items():
    
    cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
    results[model_name] = {
        'Cross-Validation Accuracy Mean': np.mean(cv_scores),
        'Cross-Validation Accuracy Std': np.std(cv_scores)
    }

    # Why Cross validation? -> 
    
    # Fit the model
    model.fit(X_train, y_train)
    
    # Test accuracy
    test_accuracy = accuracy_score(y_test, model.predict(X_test))
    results[model_name]['Test Accuracy'] = test_accuracy
    
    # Predictions
    predictions = model.predict(X_test)
    results[model_name]['Predictions'] = predictions

# Print results with predictions
for model_name, metrics in results.items():
    print(f"{model_name}:")
    print(f"  Cross-Validation Accuracy Mean: {metrics['Cross-Validation Accuracy Mean'] * 100:.2f}%")
    print(f"  Cross-Validation Accuracy Std: {metrics['Cross-Validation Accuracy Std'] * 100:.2f}%")
    print(f"  Test Accuracy: {metrics['Test Accuracy'] * 100:.2f}%")
    print(f"  Predictions: {metrics['Predictions']}\n")


Logistic Regression:
  Cross-Validation Accuracy Mean: 75.35%
  Cross-Validation Accuracy Std: 8.25%
  Test Accuracy: 78.57%
  Predictions: ['M' 'R' 'R' 'R' 'M' 'R' 'R' 'M' 'R' 'M' 'M' 'R' 'R' 'M' 'M' 'M' 'M' 'M'
 'M' 'R' 'M' 'M' 'R' 'R' 'R' 'R' 'R' 'R' 'M' 'M' 'M' 'R' 'R' 'M' 'M' 'R'
 'M' 'R' 'M' 'M' 'R' 'R']

K-Nearest Neighbors:
  Cross-Validation Accuracy Mean: 72.89%
  Cross-Validation Accuracy Std: 4.71%
  Test Accuracy: 85.71%
  Predictions: ['M' 'R' 'R' 'R' 'M' 'M' 'M' 'M' 'R' 'R' 'M' 'R' 'M' 'M' 'M' 'M' 'R' 'M'
 'R' 'R' 'M' 'R' 'M' 'M' 'R' 'R' 'R' 'R' 'M' 'M' 'M' 'R' 'R' 'M' 'M' 'R'
 'R' 'M' 'R' 'M' 'R' 'M']

Support Vector Machine:
  Cross-Validation Accuracy Mean: 74.12%
  Cross-Validation Accuracy Std: 6.13%
  Test Accuracy: 83.33%
  Predictions: ['M' 'R' 'R' 'R' 'M' 'M' 'M' 'M' 'R' 'M' 'M' 'R' 'M' 'M' 'M' 'M' 'M' 'M'
 'M' 'R' 'M' 'R' 'M' 'R' 'R' 'R' 'R' 'R' 'M' 'M' 'M' 'R' 'R' 'M' 'M' 'R'
 'M' 'R' 'R' 'M' 'R' 'R']

Random Forest:
  Cross-Validation Accuracy Mean: 83.16%
  