In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Step 1: Load dataset from CSV file
data = pd.read_csv('modified_dataset.csv')

# Step 2: Assign features (X) and labels (y)
X = data.drop(columns='target')  # Assuming 'species' is the column name for labels
y = data['target']

# If labels are categorical, convert them to numerical values
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Step 3: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 5: Create and train the SVM model
svm_model = SVC(kernel='linear', C=1.0, random_state=42)
svm_model.fit(X_train, y_train)

# Step 6: Make predictions
y_pred = svm_model.predict(X_test)

# Step 7: Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Optional: Hyperparameter tuning using GridSearchCV
from sklearn.model_selection import GridSearchCV

param_grid = {'C': [0.1, 1, 10, 100], 'kernel': ['linear', 'rbf', 'poly', 'sigmoid']}
grid_search = GridSearchCV(SVC(), param_grid, refit=True, verbose=3, cv=5)
grid_search.fit(X_train, y_train)

# Best parameters from GridSearchCV
print("Best parameters found: ", grid_search.best_params_)

# Evaluate the best model
best_svm_model = grid_search.best_estimator_
y_pred_best = best_svm_model.predict(X_test)
print("Best Model Accuracy:", accuracy_score(y_test, y_pred_best))
print("Best Model Classification Report:")
print(classification_report(y_test, y_pred_best))


Accuracy: 0.7912
Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.91      0.86      3437
           1       0.73      0.52      0.61      1563

    accuracy                           0.79      5000
   macro avg       0.77      0.72      0.73      5000
weighted avg       0.78      0.79      0.78      5000

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV 1/5] END ..............C=0.1, kernel=linear;, score=0.783 total time=  19.1s
[CV 2/5] END ..............C=0.1, kernel=linear;, score=0.784 total time=  20.8s
[CV 3/5] END ..............C=0.1, kernel=linear;, score=0.772 total time=  21.4s
[CV 4/5] END ..............C=0.1, kernel=linear;, score=0.797 total time=  20.7s
[CV 5/5] END ..............C=0.1, kernel=linear;, score=0.780 total time=  22.9s
[CV 1/5] END .................C=0.1, kernel=rbf;, score=0.785 total time=24.2min
[CV 2/5] END .................C=0.1, kernel=rbf;, score=0.786 total time=  41.9s
[CV 3