In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load your dataset from a CSV file
file_path = 'IRIS.csv'  # Update with your file path
df = pd.read_csv(file_path)

# Split the data into features (X) and target (y)
X = df.drop('species', axis=1)
y = df['species']

# Define the SVM classifier
svm = SVC()

# Define the grid of parameters to search (C: regularization parameter, gamma: kernel coefficient)
param_grid = {'C': [0.1, 1, 10, 100],
              'gamma': [1, 0.1, 0.01, 0.001],
              'kernel': ['linear', 'rbf', 'poly']}

# Use GridSearchCV to search for the best parameters and split ratio using cross-validation
grid_search = GridSearchCV(estimator=svm, param_grid=param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)

# Perform nested cross-validation to determine the best split ratio (test_size)
best_test_size = None
best_score = 0.0

for test_size in [0.1, 0.2, 0.3, 0.4]:  # Test different split ratios
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
    
    # Perform GridSearchCV on the training data
    grid_search.fit(X_train, y_train)

    # Print the best parameters found by GridSearchCV
    print("Best Parameters:", grid_search.best_params_)
    
    # Get the best model and its score
    if grid_search.best_score_ > best_score:
        best_score = grid_search.best_score_
        best_test_size = test_size

# Print the best test_size found
print()
print("Best Test Size:", best_test_size)

# Split the data with the best test_size
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=best_test_size, random_state=42)

# Train the SVM classifier with the best parameters found
best_svm = grid_search.best_estimator_
best_svm.fit(X_train, y_train)

# Make predictions on the training set
y_train_pred = best_svm.predict(X_train)

# Calculate the accuracy on the training set
train_accuracy = accuracy_score(y_train, y_train_pred)
print()
print("Training Accuracy:", train_accuracy * 100)

# Make predictions on the test set
y_test_pred = best_svm.predict(X_test)

# Calculate the accuracy on the test set
test_accuracy = accuracy_score(y_test, y_test_pred)
print()
print("Test Accuracy:", test_accuracy * 100)

# Calculate precision, recall, and F1-score
precision = precision_score(y_test, y_test_pred, average='weighted')
recall = recall_score(y_test, y_test_pred, average='weighted')
f1 = f1_score(y_test, y_test_pred, average='weighted')

print()
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Fitting 5 folds for each of 48 candidates, totalling 240 fits
Best Parameters: {'C': 0.1, 'gamma': 1, 'kernel': 'linear'}
Fitting 5 folds for each of 48 candidates, totalling 240 fits
Best Parameters: {'C': 0.1, 'gamma': 0.1, 'kernel': 'poly'}
Fitting 5 folds for each of 48 candidates, totalling 240 fits
Best Parameters: {'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}
Fitting 5 folds for each of 48 candidates, totalling 240 fits
Best Parameters: {'C': 0.1, 'gamma': 1, 'kernel': 'poly'}

Best Test Size: 0.1

Training Accuracy: 98.51851851851852

Test Accuracy: 100.0

Precision: 1.0
Recall: 1.0
F1-score: 1.0


In [5]:
# Function to predict the species based on user input
def predict_species(sepal_length, sepal_width, petal_length, petal_width):
    # Create a DataFrame with the input data and feature names
    input_data = pd.DataFrame([[sepal_length, sepal_width, petal_length, petal_width]], 
                              columns=['sepal_length', 'sepal_width', 'petal_length', 'petal_width'])
    prediction = best_svm.predict(input_data)
    return prediction[0]

# Get user input for prediction
sepal_length = float(input("Enter sepal length: "))
sepal_width = float(input("Enter sepal width: "))
petal_length = float(input("Enter petal length: "))
petal_width = float(input("Enter petal width: "))

# Predict the species based on user input
predicted_species = predict_species(sepal_length, sepal_width, petal_length, petal_width)
print()
print("Predicted Species:", predicted_species)


Enter sepal length:  6.7
Enter sepal width:  3.3
Enter petal length:  5.7
Enter petal width:  2.1



Predicted Species: Iris-virginica
