In [18]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.datasets import load_iris
from statistics import mean, stdev
from matplotlib import pyplot as plt
%matplotlib inline
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score

In [19]:
iris = load_iris()
dir(iris)

['DESCR',
 'data',
 'data_module',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

In [20]:
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [21]:
# Create a DataFrame from the iris data
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [37]:
# Extract features (X) and target labels (y) from the DataFrame
X = df.values
y = iris.target

In [38]:
# Import the Support Vector Classifier (SVC) from scikit-learn
model = SVC()

In [45]:
# Initialize StratifiedKFold for 10 folds with shuffling and a random seed
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)

# Initialize lists to store evaluation metrics for each fold
lst_accu_stratified = []
lst_f1_scores = []
lst_confusion_matrices = []

for train_index, test_index in skf.split(X, y):
    # Split the data into training and test sets
    X_train_fold, X_test_fold = X[train_index], X[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]
    
    # Fit the SVC model on the training data
    model.fit(X_train_fold, y_train_fold)
    
    # Predict labels for the test data
    y_pred = model.predict(X_test_fold)
    
    # Compute the accuracy for this fold
    accuracy = model.score(X_test_fold, y_test_fold)
    lst_accu_stratified.append(accuracy)
    
    # Compute the F1 score for this fold
    f1 = f1_score(y_test_fold, y_pred, average='weighted')
    lst_f1_scores.append(f1)
    
    # Compute the confusion matrix for this fold
    cm = confusion_matrix(y_test_fold, y_pred)
    lst_confusion_matrices.append(cm)
    

In [46]:
# Print the output.
print('List of possible accuracy:', lst_accu_stratified)
print('\nMaximum Accuracy That can be obtained from this model is:',
      max(lst_accu_stratified)*100, '%')
print('\nMinimum Accuracy:',
      min(lst_accu_stratified)*100, '%')
print('\nOverall Accuracy:',
      mean(lst_accu_stratified)*100, '%')
print('\nStandard Deviation is:', stdev(lst_accu_stratified))

List of possible accuracy: [1.0, 0.8666666666666667, 1.0, 1.0, 0.9333333333333333, 0.9333333333333333, 1.0, 1.0, 1.0, 1.0]

Maximum Accuracy That can be obtained from this model is: 100.0 %

Minimum Accuracy: 86.66666666666667 %

Overall Accuracy: 97.33333333333334 %

Standard Deviation is: 0.046613726585340055


In [47]:
print('\nList of F1 scores:', lst_f1_scores)
print('\nOverall F1 score:', mean(lst_f1_scores))

print('\nList of Confusion Matrices:', lst_confusion_matrices)


List of F1 scores: [1.0, 0.8666666666666667, 1.0, 1.0, 0.9326599326599326, 0.9326599326599326, 1.0, 1.0, 1.0, 1.0]

Overall F1 score: 0.9731986531986532

List of Confusion Matrices: [array([[5, 0, 0],
       [0, 5, 0],
       [0, 0, 5]], dtype=int64), array([[5, 0, 0],
       [0, 4, 1],
       [0, 1, 4]], dtype=int64), array([[5, 0, 0],
       [0, 5, 0],
       [0, 0, 5]], dtype=int64), array([[5, 0, 0],
       [0, 5, 0],
       [0, 0, 5]], dtype=int64), array([[5, 0, 0],
       [0, 4, 1],
       [0, 0, 5]], dtype=int64), array([[5, 0, 0],
       [0, 5, 0],
       [0, 1, 4]], dtype=int64), array([[5, 0, 0],
       [0, 5, 0],
       [0, 0, 5]], dtype=int64), array([[5, 0, 0],
       [0, 5, 0],
       [0, 0, 5]], dtype=int64), array([[5, 0, 0],
       [0, 5, 0],
       [0, 0, 5]], dtype=int64), array([[5, 0, 0],
       [0, 5, 0],
       [0, 0, 5]], dtype=int64)]
