# Evaluating Machine Learning Models

### Loading Libraries

In [17]:
# Numerical Computing 
import numpy as pn

# Data Manipulation
import pandas as pd

# Data Visualization
import seaborn as sns
import matplotlib.pyplot as plt

# Scikit-Learn
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, KFold
from sklearn.model_selection import LeavePOut, LeaveOneOut
from sklearn.model_selection import RepeatedKFold, StratifiedKFold, RepeatedStratifiedKFold

### Retrieving Data

In [2]:
def get_iris_data():
    '''Get Iris Data from Scikit-Learn Toy Datasets
    and store it in a Pandas DataFrame'''
    
    iris = load_iris()
    df = pd.DataFrame(iris['data'],columns=iris['feature_names'])
    df['class'] = iris['target']
    return df


def get_data_summary(df):
    '''Generate important summary information of a given
    Pandas DataFrame'''
    
    print("========== Summary Info ==========")
    display(df.info())
    print("\n========== Class Proportion ==========\n")
    display(df['class'].value_counts(True))
    print("\n=======================================")
    display(df)

In [3]:
df = get_iris_data()

In [4]:
get_data_summary(df)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
 4   class              150 non-null    int64  
dtypes: float64(4), int64(1)
memory usage: 6.0 KB


None





class
0    0.333333
1    0.333333
2    0.333333
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


## Random vs Stratified Split

### Random Split

In [6]:
df_train, df_unseen = train_test_split(df, test_size=0.2, random_state=0) 

df_val, df_test = train_test_split(df_unseen, test_size=0.5, random_state=0)

In [7]:
get_data_summary(df_train)

<class 'pandas.core.frame.DataFrame'>
Index: 120 entries, 137 to 47
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  120 non-null    float64
 1   sepal width (cm)   120 non-null    float64
 2   petal length (cm)  120 non-null    float64
 3   petal width (cm)   120 non-null    float64
 4   class              120 non-null    int64  
dtypes: float64(4), int64(1)
memory usage: 5.6 KB


None





class
2    0.366667
0    0.325000
1    0.308333
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
137,6.4,3.1,5.5,1.8,2
84,5.4,3.0,4.5,1.5,1
27,5.2,3.5,1.5,0.2,0
127,6.1,3.0,4.9,1.8,2
132,6.4,2.8,5.6,2.2,2
...,...,...,...,...,...
9,4.9,3.1,1.5,0.1,0
103,6.3,2.9,5.6,1.8,2
67,5.8,2.7,4.1,1.0,1
117,7.7,3.8,6.7,2.2,2


In [8]:
get_data_summary(df_val)

<class 'pandas.core.frame.DataFrame'>
Index: 15 entries, 121 to 73
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  15 non-null     float64
 1   sepal width (cm)   15 non-null     float64
 2   petal length (cm)  15 non-null     float64
 3   petal width (cm)   15 non-null     float64
 4   class              15 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 720.0 bytes


None





class
0    0.466667
1    0.333333
2    0.200000
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
121,5.6,2.8,4.9,2.0,2
62,6.0,2.2,4.0,1.0,1
26,5.0,3.4,1.6,0.4,0
40,5.0,3.5,1.3,0.3,0
7,5.0,3.4,1.5,0.2,0
45,4.8,3.0,1.4,0.3,0
16,5.4,3.9,1.3,0.4,0
71,6.1,2.8,4.0,1.3,1
86,6.7,3.1,4.7,1.5,1
22,4.6,3.6,1.0,0.2,0


In [9]:
get_data_summary(df_test)

<class 'pandas.core.frame.DataFrame'>
Index: 15 entries, 33 to 8
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  15 non-null     float64
 1   sepal width (cm)   15 non-null     float64
 2   petal length (cm)  15 non-null     float64
 3   petal width (cm)   15 non-null     float64
 4   class              15 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 720.0 bytes


None





class
1    0.533333
0    0.266667
2    0.200000
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
33,5.5,4.2,1.4,0.2,0
93,5.0,2.3,3.3,1.0,1
54,6.5,2.8,4.6,1.5,1
134,6.1,2.6,5.6,1.4,2
44,5.1,3.8,1.9,0.4,0
126,6.2,2.8,4.8,1.8,2
97,6.2,2.9,4.3,1.3,1
51,6.4,3.2,4.5,1.5,1
90,5.5,2.6,4.4,1.2,1
24,4.8,3.4,1.9,0.2,0


### Stratified Split

In [10]:
df_train, df_unseen = train_test_split(df, test_size=0.2, random_state=0, stratify=df['class']) 

df_val, df_test = train_test_split(df_unseen, test_size=0.5, random_state=0, stratify=df_unseen['class'])

In [11]:
get_data_summary(df_train)

<class 'pandas.core.frame.DataFrame'>
Index: 120 entries, 45 to 106
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  120 non-null    float64
 1   sepal width (cm)   120 non-null    float64
 2   petal length (cm)  120 non-null    float64
 3   petal width (cm)   120 non-null    float64
 4   class              120 non-null    int64  
dtypes: float64(4), int64(1)
memory usage: 5.6 KB


None





class
0    0.333333
1    0.333333
2    0.333333
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
45,4.8,3.0,1.4,0.3,0
1,4.9,3.0,1.4,0.2,0
38,4.4,3.0,1.3,0.2,0
7,5.0,3.4,1.5,0.2,0
82,5.8,2.7,3.9,1.2,1
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
34,4.9,3.1,1.5,0.2,0
117,7.7,3.8,6.7,2.2,2
128,6.4,2.8,5.6,2.1,2


In [12]:
get_data_summary(df_val)

<class 'pandas.core.frame.DataFrame'>
Index: 15 entries, 67 to 44
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  15 non-null     float64
 1   sepal width (cm)   15 non-null     float64
 2   petal length (cm)  15 non-null     float64
 3   petal width (cm)   15 non-null     float64
 4   class              15 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 720.0 bytes


None





class
1    0.333333
2    0.333333
0    0.333333
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
67,5.8,2.7,4.1,1.0,1
113,5.7,2.5,5.0,2.0,2
81,5.5,2.4,3.7,1.0,1
136,6.3,3.4,5.6,2.4,2
90,5.5,2.6,4.4,1.2,1
0,5.1,3.5,1.4,0.2,0
99,5.7,2.8,4.1,1.3,1
139,6.9,3.1,5.4,2.1,2
3,4.6,3.1,1.5,0.2,0
129,7.2,3.0,5.8,1.6,2


In [13]:
get_data_summary(df_test)

<class 'pandas.core.frame.DataFrame'>
Index: 15 entries, 100 to 19
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  15 non-null     float64
 1   sepal width (cm)   15 non-null     float64
 2   petal length (cm)  15 non-null     float64
 3   petal width (cm)   15 non-null     float64
 4   class              15 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 720.0 bytes


None





class
2    0.333333
1    0.333333
0    0.333333
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
100,6.3,3.3,6.0,2.5,2
134,6.1,2.6,5.6,1.4,2
147,6.5,3.0,5.2,2.0,2
86,6.7,3.1,4.7,1.5,1
47,4.6,3.2,1.4,0.2,0
50,7.0,3.2,4.7,1.4,1
55,5.7,2.8,4.5,1.3,1
36,5.5,3.5,1.3,0.2,0
65,6.7,3.1,4.4,1.4,1
46,5.1,3.8,1.6,0.2,0


## K-Folds Cross Validation

### Random Split

In [14]:
df_cv, df_test = train_test_split(df, test_size=0.2, random_state=0) 

In [15]:
get_data_summary(df_cv)

<class 'pandas.core.frame.DataFrame'>
Index: 120 entries, 137 to 47
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  120 non-null    float64
 1   sepal width (cm)   120 non-null    float64
 2   petal length (cm)  120 non-null    float64
 3   petal width (cm)   120 non-null    float64
 4   class              120 non-null    int64  
dtypes: float64(4), int64(1)
memory usage: 5.6 KB


None





class
2    0.366667
0    0.325000
1    0.308333
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
137,6.4,3.1,5.5,1.8,2
84,5.4,3.0,4.5,1.5,1
27,5.2,3.5,1.5,0.2,0
127,6.1,3.0,4.9,1.8,2
132,6.4,2.8,5.6,2.2,2
...,...,...,...,...,...
9,4.9,3.1,1.5,0.1,0
103,6.3,2.9,5.6,1.8,2
67,5.8,2.7,4.1,1.0,1
117,7.7,3.8,6.7,2.2,2


In [18]:
kf = KFold(n_splits=4) 

iter_num = 1

for train_index, val_index in kf.split(df_cv): 
    df_train, df_val = df_cv.iloc[train_index], df_cv.iloc[val_index]
    
    print("="*60)
    print(f"ITER {iter_num}\n")
    print("Train Set")
    get_data_summary(df_train)
    print("Val Set")
    get_data_summary(df_val)
    iter_num += 1

    #perform training or hyperparameter tuning here

ITER 1

Train Set
<class 'pandas.core.frame.DataFrame'>
Index: 90 entries, 85 to 47
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  90 non-null     float64
 1   sepal width (cm)   90 non-null     float64
 2   petal length (cm)  90 non-null     float64
 3   petal width (cm)   90 non-null     float64
 4   class              90 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 4.2 KB


None





class
0    0.377778
2    0.322222
1    0.300000
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
85,6.0,3.4,4.5,1.6,1
30,4.8,3.1,1.6,0.2,0
101,5.8,2.7,5.1,1.9,2
94,5.6,2.7,4.2,1.3,1
64,5.6,2.9,3.6,1.3,1
...,...,...,...,...,...
9,4.9,3.1,1.5,0.1,0
103,6.3,2.9,5.6,1.8,2
67,5.8,2.7,4.1,1.0,1
117,7.7,3.8,6.7,2.2,2


Val Set
<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 137 to 147
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  30 non-null     float64
 1   sepal width (cm)   30 non-null     float64
 2   petal length (cm)  30 non-null     float64
 3   petal width (cm)   30 non-null     float64
 4   class              30 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 1.4 KB


None





class
2    0.500000
1    0.333333
0    0.166667
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
137,6.4,3.1,5.5,1.8,2
84,5.4,3.0,4.5,1.5,1
27,5.2,3.5,1.5,0.2,0
127,6.1,3.0,4.9,1.8,2
132,6.4,2.8,5.6,2.2,2
59,5.2,2.7,3.9,1.4,1
18,5.7,3.8,1.7,0.3,0
83,6.0,2.7,5.1,1.6,1
61,5.9,3.0,4.2,1.5,1
92,5.8,2.6,4.0,1.2,1


ITER 2

Train Set
<class 'pandas.core.frame.DataFrame'>
Index: 90 entries, 137 to 47
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  90 non-null     float64
 1   sepal width (cm)   90 non-null     float64
 2   petal length (cm)  90 non-null     float64
 3   petal width (cm)   90 non-null     float64
 4   class              90 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 4.2 KB


None





class
2    0.377778
0    0.322222
1    0.300000
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
137,6.4,3.1,5.5,1.8,2
84,5.4,3.0,4.5,1.5,1
27,5.2,3.5,1.5,0.2,0
127,6.1,3.0,4.9,1.8,2
132,6.4,2.8,5.6,2.2,2
...,...,...,...,...,...
9,4.9,3.1,1.5,0.1,0
103,6.3,2.9,5.6,1.8,2
67,5.8,2.7,4.1,1.0,1
117,7.7,3.8,6.7,2.2,2


Val Set
<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 85 to 110
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  30 non-null     float64
 1   sepal width (cm)   30 non-null     float64
 2   petal length (cm)  30 non-null     float64
 3   petal width (cm)   30 non-null     float64
 4   class              30 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 1.4 KB


None





class
1    0.333333
0    0.333333
2    0.333333
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
85,6.0,3.4,4.5,1.6,1
30,4.8,3.1,1.6,0.2,0
101,5.8,2.7,5.1,1.9,2
94,5.6,2.7,4.2,1.3,1
64,5.6,2.9,3.6,1.3,1
89,5.5,2.5,4.0,1.3,1
91,6.1,3.0,4.6,1.4,1
125,7.2,3.2,6.0,1.8,2
48,5.3,3.7,1.5,0.2,0
13,4.3,3.0,1.1,0.1,0


ITER 3

Train Set
<class 'pandas.core.frame.DataFrame'>
Index: 90 entries, 137 to 47
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  90 non-null     float64
 1   sepal width (cm)   90 non-null     float64
 2   petal length (cm)  90 non-null     float64
 3   petal width (cm)   90 non-null     float64
 4   class              90 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 4.2 KB


None





class
2    0.377778
1    0.344444
0    0.277778
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
137,6.4,3.1,5.5,1.8,2
84,5.4,3.0,4.5,1.5,1
27,5.2,3.5,1.5,0.2,0
127,6.1,3.0,4.9,1.8,2
132,6.4,2.8,5.6,2.2,2
...,...,...,...,...,...
9,4.9,3.1,1.5,0.1,0
103,6.3,2.9,5.6,1.8,2
67,5.8,2.7,4.1,1.0,1
117,7.7,3.8,6.7,2.2,2


Val Set
<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 124 to 138
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  30 non-null     float64
 1   sepal width (cm)   30 non-null     float64
 2   petal length (cm)  30 non-null     float64
 3   petal width (cm)   30 non-null     float64
 4   class              30 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 1.4 KB


None





class
0    0.466667
2    0.333333
1    0.200000
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
124,6.7,3.3,5.7,2.1,2
41,4.5,2.3,1.3,0.3,0
148,6.2,3.4,5.4,2.3,2
1,4.9,3.0,1.4,0.2,0
113,5.7,2.5,5.0,2.0,2
139,6.9,3.1,5.4,2.1,2
42,4.4,3.2,1.3,0.2,0
4,5.0,3.6,1.4,0.2,0
129,7.2,3.0,5.8,1.6,2
17,5.1,3.5,1.4,0.3,0


ITER 4

Train Set
<class 'pandas.core.frame.DataFrame'>
Index: 90 entries, 137 to 138
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  90 non-null     float64
 1   sepal width (cm)   90 non-null     float64
 2   petal length (cm)  90 non-null     float64
 3   petal width (cm)   90 non-null     float64
 4   class              90 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 4.2 KB


None





class
2    0.388889
0    0.322222
1    0.288889
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
137,6.4,3.1,5.5,1.8,2
84,5.4,3.0,4.5,1.5,1
27,5.2,3.5,1.5,0.2,0
127,6.1,3.0,4.9,1.8,2
132,6.4,2.8,5.6,2.2,2
...,...,...,...,...,...
57,4.9,2.4,3.3,1.0,1
131,7.9,3.8,6.4,2.0,2
65,6.7,3.1,4.4,1.4,1
32,5.2,4.1,1.5,0.1,0


Val Set
<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 14 to 47
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  30 non-null     float64
 1   sepal width (cm)   30 non-null     float64
 2   petal length (cm)  30 non-null     float64
 3   petal width (cm)   30 non-null     float64
 4   class              30 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 1.4 KB


None





class
1    0.366667
0    0.333333
2    0.300000
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
14,5.8,4.0,1.2,0.2,0
122,7.7,2.8,6.7,2.0,2
19,5.1,3.8,1.5,0.3,0
29,4.7,3.2,1.6,0.2,0
130,7.4,2.8,6.1,1.9,2
49,5.0,3.3,1.4,0.2,0
136,6.3,3.4,5.6,2.4,2
99,5.7,2.8,4.1,1.3,1
82,5.8,2.7,3.9,1.2,1
79,5.7,2.6,3.5,1.0,1


### Stratified Split

In [19]:
df_cv, df_test = train_test_split(df, test_size=0.2, random_state=0, stratify=df['class']) 

In [20]:
get_data_summary(df_cv)

<class 'pandas.core.frame.DataFrame'>
Index: 120 entries, 45 to 106
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  120 non-null    float64
 1   sepal width (cm)   120 non-null    float64
 2   petal length (cm)  120 non-null    float64
 3   petal width (cm)   120 non-null    float64
 4   class              120 non-null    int64  
dtypes: float64(4), int64(1)
memory usage: 5.6 KB


None





class
0    0.333333
1    0.333333
2    0.333333
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
45,4.8,3.0,1.4,0.3,0
1,4.9,3.0,1.4,0.2,0
38,4.4,3.0,1.3,0.2,0
7,5.0,3.4,1.5,0.2,0
82,5.8,2.7,3.9,1.2,1
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
34,4.9,3.1,1.5,0.2,0
117,7.7,3.8,6.7,2.2,2
128,6.4,2.8,5.6,2.1,2


In [21]:
skf = StratifiedKFold(n_splits=4) 

iter_num = 1
for train_index, val_index in skf.split(df_cv,df_cv['class']): 
    df_train, df_val = df_cv.iloc[train_index], df_cv.iloc[val_index]
    
    print("="*60)
    print(f"ITER {iter_num}\n")
    print("Train Set")
    get_data_summary(df_train)
    print("Val Set")
    get_data_summary(df_val)
    iter_num += 1

    #perform training or hyperparameter tuning here

ITER 1

Train Set
<class 'pandas.core.frame.DataFrame'>
Index: 90 entries, 26 to 106
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  90 non-null     float64
 1   sepal width (cm)   90 non-null     float64
 2   petal length (cm)  90 non-null     float64
 3   petal width (cm)   90 non-null     float64
 4   class              90 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 4.2 KB


None





class
0    0.333333
2    0.333333
1    0.333333
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
26,5.0,3.4,1.6,0.4,0
125,7.2,3.2,6.0,1.8,2
31,5.4,3.4,1.5,0.4,0
110,6.5,3.2,5.1,2.0,2
126,6.2,2.8,4.8,1.8,2
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
34,4.9,3.1,1.5,0.2,0
117,7.7,3.8,6.7,2.2,2
128,6.4,2.8,5.6,2.1,2


Val Set
<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 45 to 91
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  30 non-null     float64
 1   sepal width (cm)   30 non-null     float64
 2   petal length (cm)  30 non-null     float64
 3   petal width (cm)   30 non-null     float64
 4   class              30 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 1.4 KB


None





class
0    0.333333
1    0.333333
2    0.333333
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
45,4.8,3.0,1.4,0.3,0
1,4.9,3.0,1.4,0.2,0
38,4.4,3.0,1.3,0.2,0
7,5.0,3.4,1.5,0.2,0
82,5.8,2.7,3.9,1.2,1
37,4.9,3.6,1.4,0.1,0
108,6.7,2.5,5.8,1.8,2
124,6.7,3.3,5.7,2.1,2
72,6.3,2.5,4.9,1.5,1
127,6.1,3.0,4.9,1.8,2


ITER 2

Train Set
<class 'pandas.core.frame.DataFrame'>
Index: 90 entries, 45 to 106
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  90 non-null     float64
 1   sepal width (cm)   90 non-null     float64
 2   petal length (cm)  90 non-null     float64
 3   petal width (cm)   90 non-null     float64
 4   class              90 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 4.2 KB


None





class
0    0.333333
1    0.333333
2    0.333333
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
45,4.8,3.0,1.4,0.3,0
1,4.9,3.0,1.4,0.2,0
38,4.4,3.0,1.3,0.2,0
7,5.0,3.4,1.5,0.2,0
82,5.8,2.7,3.9,1.2,1
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
34,4.9,3.1,1.5,0.2,0
117,7.7,3.8,6.7,2.2,2
128,6.4,2.8,5.6,2.1,2


Val Set
<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 26 to 33
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  30 non-null     float64
 1   sepal width (cm)   30 non-null     float64
 2   petal length (cm)  30 non-null     float64
 3   petal width (cm)   30 non-null     float64
 4   class              30 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 1.4 KB


None





class
0    0.333333
2    0.333333
1    0.333333
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
26,5.0,3.4,1.6,0.4,0
125,7.2,3.2,6.0,1.8,2
31,5.4,3.4,1.5,0.4,0
110,6.5,3.2,5.1,2.0,2
126,6.2,2.8,4.8,1.8,2
89,5.5,2.5,4.0,1.3,1
122,7.7,2.8,6.7,2.0,2
79,5.7,2.6,3.5,1.0,1
132,6.4,2.8,5.6,2.2,2
78,6.0,2.9,4.5,1.5,1


ITER 3

Train Set
<class 'pandas.core.frame.DataFrame'>
Index: 90 entries, 45 to 106
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  90 non-null     float64
 1   sepal width (cm)   90 non-null     float64
 2   petal length (cm)  90 non-null     float64
 3   petal width (cm)   90 non-null     float64
 4   class              90 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 4.2 KB


None





class
0    0.333333
1    0.333333
2    0.333333
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
45,4.8,3.0,1.4,0.3,0
1,4.9,3.0,1.4,0.2,0
38,4.4,3.0,1.3,0.2,0
7,5.0,3.4,1.5,0.2,0
82,5.8,2.7,3.9,1.2,1
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
34,4.9,3.1,1.5,0.2,0
117,7.7,3.8,6.7,2.2,2
128,6.4,2.8,5.6,2.1,2


Val Set
<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 97 to 20
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  30 non-null     float64
 1   sepal width (cm)   30 non-null     float64
 2   petal length (cm)  30 non-null     float64
 3   petal width (cm)   30 non-null     float64
 4   class              30 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 1.4 KB


None





class
1    0.333333
0    0.333333
2    0.333333
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
97,6.2,2.9,4.3,1.3,1
15,5.7,4.4,1.5,0.4,0
61,5.9,3.0,4.2,1.5,1
77,6.7,3.0,5.0,1.7,1
105,7.6,3.0,6.6,2.1,2
66,5.6,3.0,4.5,1.5,1
119,6.0,2.2,5.0,1.5,2
12,4.8,3.0,1.4,0.1,0
41,4.5,2.3,1.3,0.3,0
137,6.4,3.1,5.5,1.8,2


ITER 4

Train Set
<class 'pandas.core.frame.DataFrame'>
Index: 90 entries, 45 to 20
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  90 non-null     float64
 1   sepal width (cm)   90 non-null     float64
 2   petal length (cm)  90 non-null     float64
 3   petal width (cm)   90 non-null     float64
 4   class              90 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 4.2 KB


None





class
0    0.333333
1    0.333333
2    0.333333
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
45,4.8,3.0,1.4,0.3,0
1,4.9,3.0,1.4,0.2,0
38,4.4,3.0,1.3,0.2,0
7,5.0,3.4,1.5,0.2,0
82,5.8,2.7,3.9,1.2,1
...,...,...,...,...,...
40,5.0,3.5,1.3,0.3,0
49,5.0,3.3,1.4,0.2,0
148,6.2,3.4,5.4,2.3,2
133,6.3,2.8,5.1,1.5,2


Val Set
<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 76 to 106
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  30 non-null     float64
 1   sepal width (cm)   30 non-null     float64
 2   petal length (cm)  30 non-null     float64
 3   petal width (cm)   30 non-null     float64
 4   class              30 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 1.4 KB


None





class
1    0.333333
2    0.333333
0    0.333333
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
76,6.8,2.8,4.8,1.4,1
107,7.3,2.9,6.3,1.8,2
30,4.8,3.1,1.6,0.2,0
25,5.0,3.0,1.6,0.2,0
42,4.4,3.2,1.3,0.2,0
14,5.8,4.0,1.2,0.2,0
58,6.6,2.9,4.6,1.3,1
68,6.2,2.2,4.5,1.5,1
85,6.0,3.4,4.5,1.6,1
143,6.8,3.2,5.9,2.3,2


## Repeated K-Folds Cross-Validation

### Random Split

In [22]:
df_cv, df_test = train_test_split(df, test_size=0.2, random_state=0) 

In [23]:
get_data_summary(df_cv)

<class 'pandas.core.frame.DataFrame'>
Index: 120 entries, 137 to 47
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  120 non-null    float64
 1   sepal width (cm)   120 non-null    float64
 2   petal length (cm)  120 non-null    float64
 3   petal width (cm)   120 non-null    float64
 4   class              120 non-null    int64  
dtypes: float64(4), int64(1)
memory usage: 5.6 KB


None





class
2    0.366667
0    0.325000
1    0.308333
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
137,6.4,3.1,5.5,1.8,2
84,5.4,3.0,4.5,1.5,1
27,5.2,3.5,1.5,0.2,0
127,6.1,3.0,4.9,1.8,2
132,6.4,2.8,5.6,2.2,2
...,...,...,...,...,...
9,4.9,3.1,1.5,0.1,0
103,6.3,2.9,5.6,1.8,2
67,5.8,2.7,4.1,1.0,1
117,7.7,3.8,6.7,2.2,2


In [24]:
rkf = RepeatedKFold(n_splits=4, n_repeats=3, random_state=0)

iter_num = 1
for train_index, val_index in rkf.split(df_cv): 
    df_train, df_val = df_cv.iloc[train_index], df_cv.iloc[val_index]
    
    print("="*60)
    print(f"ITER {iter_num}\n")
    print("Train Set")
    get_data_summary(df_train)
    print("Val Set")
    get_data_summary(df_val)
    iter_num += 1

    #perform training or hyperparameter tuning here

ITER 1

Train Set
<class 'pandas.core.frame.DataFrame'>
Index: 90 entries, 137 to 117
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  90 non-null     float64
 1   sepal width (cm)   90 non-null     float64
 2   petal length (cm)  90 non-null     float64
 3   petal width (cm)   90 non-null     float64
 4   class              90 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 4.2 KB


None





class
2    0.355556
1    0.322222
0    0.322222
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
137,6.4,3.1,5.5,1.8,2
84,5.4,3.0,4.5,1.5,1
127,6.1,3.0,4.9,1.8,2
132,6.4,2.8,5.6,2.2,2
59,5.2,2.7,3.9,1.4,1
...,...,...,...,...,...
21,5.1,3.7,1.5,0.4,0
9,4.9,3.1,1.5,0.1,0
103,6.3,2.9,5.6,1.8,2
67,5.8,2.7,4.1,1.0,1


Val Set
<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 27 to 47
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  30 non-null     float64
 1   sepal width (cm)   30 non-null     float64
 2   petal length (cm)  30 non-null     float64
 3   petal width (cm)   30 non-null     float64
 4   class              30 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 1.4 KB


None





class
2    0.400000
0    0.333333
1    0.266667
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
27,5.2,3.5,1.5,0.2,0
83,6.0,2.7,5.1,1.6,1
61,5.9,3.0,4.2,1.5,1
112,6.8,3.0,5.5,2.1,2
43,5.0,3.5,1.6,0.6,0
116,6.5,3.0,5.5,1.8,2
56,6.3,3.3,4.7,1.6,1
123,6.3,2.7,4.9,1.8,2
106,4.9,2.5,4.5,1.7,2
85,6.0,3.4,4.5,1.6,1


ITER 2

Train Set
<class 'pandas.core.frame.DataFrame'>
Index: 90 entries, 137 to 47
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  90 non-null     float64
 1   sepal width (cm)   90 non-null     float64
 2   petal length (cm)  90 non-null     float64
 3   petal width (cm)   90 non-null     float64
 4   class              90 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 4.2 KB


None





class
1    0.344444
2    0.333333
0    0.322222
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
137,6.4,3.1,5.5,1.8,2
27,5.2,3.5,1.5,0.2,0
59,5.2,2.7,3.9,1.4,1
83,6.0,2.7,5.1,1.6,1
61,5.9,3.0,4.2,1.5,1
...,...,...,...,...,...
9,4.9,3.1,1.5,0.1,0
103,6.3,2.9,5.6,1.8,2
67,5.8,2.7,4.1,1.0,1
117,7.7,3.8,6.7,2.2,2


Val Set
<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 84 to 36
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  30 non-null     float64
 1   sepal width (cm)   30 non-null     float64
 2   petal length (cm)  30 non-null     float64
 3   petal width (cm)   30 non-null     float64
 4   class              30 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 1.4 KB


None





class
2    0.466667
0    0.333333
1    0.200000
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
84,5.4,3.0,4.5,1.5,1
127,6.1,3.0,4.9,1.8,2
132,6.4,2.8,5.6,2.2,2
18,5.7,3.8,1.7,0.3,0
2,4.7,3.2,1.3,0.2,0
60,5.0,2.0,3.5,1.0,1
144,6.7,3.3,5.7,2.5,2
119,6.0,2.2,5.0,1.5,2
146,6.3,2.5,5.0,1.9,2
95,5.7,3.0,4.2,1.2,1


ITER 3

Train Set
<class 'pandas.core.frame.DataFrame'>
Index: 90 entries, 84 to 47
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  90 non-null     float64
 1   sepal width (cm)   90 non-null     float64
 2   petal length (cm)  90 non-null     float64
 3   petal width (cm)   90 non-null     float64
 4   class              90 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 4.2 KB


None





class
2    0.377778
0    0.344444
1    0.277778
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
84,5.4,3.0,4.5,1.5,1
27,5.2,3.5,1.5,0.2,0
127,6.1,3.0,4.9,1.8,2
132,6.4,2.8,5.6,2.2,2
18,5.7,3.8,1.7,0.3,0
...,...,...,...,...,...
87,6.3,2.3,4.4,1.3,1
36,5.5,3.5,1.3,0.2,0
9,4.9,3.1,1.5,0.1,0
67,5.8,2.7,4.1,1.0,1


Val Set
<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 137 to 117
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  30 non-null     float64
 1   sepal width (cm)   30 non-null     float64
 2   petal length (cm)  30 non-null     float64
 3   petal width (cm)   30 non-null     float64
 4   class              30 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 1.4 KB


None





class
1    0.400000
2    0.333333
0    0.266667
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
137,6.4,3.1,5.5,1.8,2
59,5.2,2.7,3.9,1.4,1
10,5.4,3.7,1.5,0.2,0
108,6.7,2.5,5.8,1.8,2
80,5.5,2.4,3.8,1.1,1
50,7.0,3.2,4.7,1.4,1
147,6.5,3.0,5.2,2.0,2
30,4.8,3.1,1.6,0.2,0
101,5.8,2.7,5.1,1.9,2
64,5.6,2.9,3.6,1.3,1


ITER 4

Train Set
<class 'pandas.core.frame.DataFrame'>
Index: 90 entries, 137 to 47
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  90 non-null     float64
 1   sepal width (cm)   90 non-null     float64
 2   petal length (cm)  90 non-null     float64
 3   petal width (cm)   90 non-null     float64
 4   class              90 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 4.2 KB


None





class
2    0.400000
0    0.311111
1    0.288889
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
137,6.4,3.1,5.5,1.8,2
84,5.4,3.0,4.5,1.5,1
27,5.2,3.5,1.5,0.2,0
127,6.1,3.0,4.9,1.8,2
132,6.4,2.8,5.6,2.2,2
...,...,...,...,...,...
36,5.5,3.5,1.3,0.2,0
21,5.1,3.7,1.5,0.4,0
103,6.3,2.9,5.6,1.8,2
117,7.7,3.8,6.7,2.2,2


Val Set
<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 92 to 67
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  30 non-null     float64
 1   sepal width (cm)   30 non-null     float64
 2   petal length (cm)  30 non-null     float64
 3   petal width (cm)   30 non-null     float64
 4   class              30 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 1.4 KB


None





class
1    0.366667
0    0.366667
2    0.266667
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
92,5.8,2.6,4.0,1.2,1
141,6.9,3.1,5.1,2.3,2
69,5.6,2.5,3.9,1.1,1
135,7.7,3.0,6.1,2.3,2
133,6.3,2.8,5.1,1.5,2
91,6.1,3.0,4.6,1.4,1
125,7.2,3.2,6.0,1.8,2
13,4.3,3.0,1.1,0.1,0
52,6.9,3.1,4.9,1.5,1
149,5.9,3.0,5.1,1.8,2


ITER 5

Train Set
<class 'pandas.core.frame.DataFrame'>
Index: 90 entries, 137 to 47
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  90 non-null     float64
 1   sepal width (cm)   90 non-null     float64
 2   petal length (cm)  90 non-null     float64
 3   petal width (cm)   90 non-null     float64
 4   class              90 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 4.2 KB


None





class
2    0.400000
0    0.344444
1    0.255556
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
137,6.4,3.1,5.5,1.8,2
84,5.4,3.0,4.5,1.5,1
27,5.2,3.5,1.5,0.2,0
127,6.1,3.0,4.9,1.8,2
132,6.4,2.8,5.6,2.2,2
...,...,...,...,...,...
21,5.1,3.7,1.5,0.4,0
9,4.9,3.1,1.5,0.1,0
103,6.3,2.9,5.6,1.8,2
117,7.7,3.8,6.7,2.2,2


Val Set
<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 59 to 67
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  30 non-null     float64
 1   sepal width (cm)   30 non-null     float64
 2   petal length (cm)  30 non-null     float64
 3   petal width (cm)   30 non-null     float64
 4   class              30 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 1.4 KB


None





class
1    0.466667
2    0.266667
0    0.266667
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
59,5.2,2.7,3.9,1.4,1
61,5.9,3.0,4.2,1.5,1
92,5.8,2.6,4.0,1.2,1
141,6.9,3.1,5.1,2.3,2
144,6.7,3.3,5.7,2.5,2
50,7.0,3.2,4.7,1.4,1
64,5.6,2.9,3.6,1.3,1
125,7.2,3.2,6.0,1.8,2
13,4.3,3.0,1.1,0.1,0
52,6.9,3.1,4.9,1.5,1


ITER 6

Train Set
<class 'pandas.core.frame.DataFrame'>
Index: 90 entries, 137 to 47
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  90 non-null     float64
 1   sepal width (cm)   90 non-null     float64
 2   petal length (cm)  90 non-null     float64
 3   petal width (cm)   90 non-null     float64
 4   class              90 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 4.2 KB


None





class
2    0.366667
0    0.333333
1    0.300000
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
137,6.4,3.1,5.5,1.8,2
27,5.2,3.5,1.5,0.2,0
127,6.1,3.0,4.9,1.8,2
132,6.4,2.8,5.6,2.2,2
59,5.2,2.7,3.9,1.4,1
...,...,...,...,...,...
9,4.9,3.1,1.5,0.1,0
103,6.3,2.9,5.6,1.8,2
67,5.8,2.7,4.1,1.0,1
117,7.7,3.8,6.7,2.2,2


Val Set
<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 84 to 88
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  30 non-null     float64
 1   sepal width (cm)   30 non-null     float64
 2   petal length (cm)  30 non-null     float64
 3   petal width (cm)   30 non-null     float64
 4   class              30 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 1.4 KB


None





class
2    0.366667
1    0.333333
0    0.300000
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
84,5.4,3.0,4.5,1.5,1
18,5.7,3.8,1.7,0.3,0
83,6.0,2.7,5.1,1.6,1
60,5.0,2.0,3.5,1.0,1
116,6.5,3.0,5.5,1.8,2
119,6.0,2.2,5.0,1.5,2
135,7.7,3.0,6.1,2.3,2
56,6.3,3.3,4.7,1.6,1
133,6.3,2.8,5.1,1.5,2
147,6.5,3.0,5.2,2.0,2


ITER 7

Train Set
<class 'pandas.core.frame.DataFrame'>
Index: 90 entries, 137 to 117
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  90 non-null     float64
 1   sepal width (cm)   90 non-null     float64
 2   petal length (cm)  90 non-null     float64
 3   petal width (cm)   90 non-null     float64
 4   class              90 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 4.2 KB


None





class
1    0.366667
2    0.344444
0    0.288889
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
137,6.4,3.1,5.5,1.8,2
84,5.4,3.0,4.5,1.5,1
27,5.2,3.5,1.5,0.2,0
59,5.2,2.7,3.9,1.4,1
18,5.7,3.8,1.7,0.3,0
...,...,...,...,...,...
21,5.1,3.7,1.5,0.4,0
9,4.9,3.1,1.5,0.1,0
103,6.3,2.9,5.6,1.8,2
67,5.8,2.7,4.1,1.0,1


Val Set
<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 127 to 47
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  30 non-null     float64
 1   sepal width (cm)   30 non-null     float64
 2   petal length (cm)  30 non-null     float64
 3   petal width (cm)   30 non-null     float64
 4   class              30 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 1.4 KB


None





class
2    0.433333
0    0.433333
1    0.133333
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
127,6.1,3.0,4.9,1.8,2
132,6.4,2.8,5.6,2.2,2
43,5.0,3.5,1.6,0.6,0
108,6.7,2.5,5.8,1.8,2
123,6.3,2.7,4.9,1.8,2
106,4.9,2.5,4.5,1.7,2
85,6.0,3.4,4.5,1.6,1
101,5.8,2.7,5.1,1.9,2
48,5.3,3.7,1.5,0.2,0
111,6.4,2.7,5.3,1.9,2


ITER 8

Train Set
<class 'pandas.core.frame.DataFrame'>
Index: 90 entries, 84 to 47
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  90 non-null     float64
 1   sepal width (cm)   90 non-null     float64
 2   petal length (cm)  90 non-null     float64
 3   petal width (cm)   90 non-null     float64
 4   class              90 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 4.2 KB


None





class
2    0.355556
0    0.333333
1    0.311111
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
84,5.4,3.0,4.5,1.5,1
127,6.1,3.0,4.9,1.8,2
132,6.4,2.8,5.6,2.2,2
59,5.2,2.7,3.9,1.4,1
18,5.7,3.8,1.7,0.3,0
...,...,...,...,...,...
88,5.6,3.0,4.1,1.3,1
70,5.9,3.2,4.8,1.8,1
36,5.5,3.5,1.3,0.2,0
67,5.8,2.7,4.1,1.0,1


Val Set
<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 137 to 117
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  30 non-null     float64
 1   sepal width (cm)   30 non-null     float64
 2   petal length (cm)  30 non-null     float64
 3   petal width (cm)   30 non-null     float64
 4   class              30 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 1.4 KB


None





class
2    0.4
0    0.3
1    0.3
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
137,6.4,3.1,5.5,1.8,2
27,5.2,3.5,1.5,0.2,0
112,6.8,3.0,5.5,2.1,2
2,4.7,3.2,1.3,0.2,0
10,5.4,3.7,1.5,0.2,0
69,5.6,2.5,3.9,1.1,1
80,5.5,2.4,3.8,1.1,1
146,6.3,2.5,5.0,1.9,2
89,5.5,2.5,4.0,1.3,1
91,6.1,3.0,4.6,1.4,1


ITER 9

Train Set
<class 'pandas.core.frame.DataFrame'>
Index: 90 entries, 137 to 47
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  90 non-null     float64
 1   sepal width (cm)   90 non-null     float64
 2   petal length (cm)  90 non-null     float64
 3   petal width (cm)   90 non-null     float64
 4   class              90 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 4.2 KB


None





class
2    0.344444
0    0.344444
1    0.311111
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
137,6.4,3.1,5.5,1.8,2
84,5.4,3.0,4.5,1.5,1
27,5.2,3.5,1.5,0.2,0
127,6.1,3.0,4.9,1.8,2
132,6.4,2.8,5.6,2.2,2
...,...,...,...,...,...
21,5.1,3.7,1.5,0.4,0
9,4.9,3.1,1.5,0.1,0
103,6.3,2.9,5.6,1.8,2
67,5.8,2.7,4.1,1.0,1


Val Set
<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 43 to 117
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  30 non-null     float64
 1   sepal width (cm)   30 non-null     float64
 2   petal length (cm)  30 non-null     float64
 3   petal width (cm)   30 non-null     float64
 4   class              30 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 1.4 KB


None





class
2    0.433333
1    0.300000
0    0.266667
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
43,5.0,3.5,1.6,0.6,0
10,5.4,3.7,1.5,0.2,0
60,5.0,2.0,3.5,1.0,1
123,6.3,2.7,4.9,1.8,2
106,4.9,2.5,4.5,1.7,2
125,7.2,3.2,6.0,1.8,2
48,5.3,3.7,1.5,0.2,0
20,5.4,3.4,1.7,0.2,0
68,6.2,2.2,4.5,1.5,1
12,4.8,3.0,1.4,0.1,0


ITER 10

Train Set
<class 'pandas.core.frame.DataFrame'>
Index: 90 entries, 137 to 47
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  90 non-null     float64
 1   sepal width (cm)   90 non-null     float64
 2   petal length (cm)  90 non-null     float64
 3   petal width (cm)   90 non-null     float64
 4   class              90 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 4.2 KB


None





class
2    0.377778
0    0.333333
1    0.288889
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
137,6.4,3.1,5.5,1.8,2
84,5.4,3.0,4.5,1.5,1
27,5.2,3.5,1.5,0.2,0
127,6.1,3.0,4.9,1.8,2
59,5.2,2.7,3.9,1.4,1
...,...,...,...,...,...
9,4.9,3.1,1.5,0.1,0
103,6.3,2.9,5.6,1.8,2
67,5.8,2.7,4.1,1.0,1
117,7.7,3.8,6.7,2.2,2


Val Set
<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 132 to 70
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  30 non-null     float64
 1   sepal width (cm)   30 non-null     float64
 2   petal length (cm)  30 non-null     float64
 3   petal width (cm)   30 non-null     float64
 4   class              30 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 1.4 KB


None





class
1    0.366667
2    0.333333
0    0.300000
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
132,6.4,2.8,5.6,2.2,2
18,5.7,3.8,1.7,0.3,0
83,6.0,2.7,5.1,1.6,1
2,4.7,3.2,1.3,0.2,0
116,6.5,3.0,5.5,1.8,2
108,6.7,2.5,5.8,1.8,2
135,7.7,3.0,6.1,2.3,2
56,6.3,3.3,4.7,1.6,1
50,7.0,3.2,4.7,1.4,1
101,5.8,2.7,5.1,1.9,2


ITER 11

Train Set
<class 'pandas.core.frame.DataFrame'>
Index: 90 entries, 127 to 47
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  90 non-null     float64
 1   sepal width (cm)   90 non-null     float64
 2   petal length (cm)  90 non-null     float64
 3   petal width (cm)   90 non-null     float64
 4   class              90 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 4.2 KB


None





class
2    0.344444
1    0.333333
0    0.322222
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
127,6.1,3.0,4.9,1.8,2
132,6.4,2.8,5.6,2.2,2
18,5.7,3.8,1.7,0.3,0
83,6.0,2.7,5.1,1.6,1
92,5.8,2.6,4.0,1.2,1
...,...,...,...,...,...
70,5.9,3.2,4.8,1.8,1
87,6.3,2.3,4.4,1.3,1
36,5.5,3.5,1.3,0.2,0
117,7.7,3.8,6.7,2.2,2


Val Set
<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 137 to 67
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  30 non-null     float64
 1   sepal width (cm)   30 non-null     float64
 2   petal length (cm)  30 non-null     float64
 3   petal width (cm)   30 non-null     float64
 4   class              30 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 1.4 KB


None





class
2    0.433333
0    0.333333
1    0.233333
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
137,6.4,3.1,5.5,1.8,2
84,5.4,3.0,4.5,1.5,1
27,5.2,3.5,1.5,0.2,0
59,5.2,2.7,3.9,1.4,1
61,5.9,3.0,4.2,1.5,1
141,6.9,3.1,5.1,2.3,2
144,6.7,3.3,5.7,2.5,2
119,6.0,2.2,5.0,1.5,2
133,6.3,2.8,5.1,1.5,2
85,6.0,3.4,4.5,1.6,1


ITER 12

Train Set
<class 'pandas.core.frame.DataFrame'>
Index: 90 entries, 137 to 117
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  90 non-null     float64
 1   sepal width (cm)   90 non-null     float64
 2   petal length (cm)  90 non-null     float64
 3   petal width (cm)   90 non-null     float64
 4   class              90 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 4.2 KB


None





class
2    0.4
1    0.3
0    0.3
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
137,6.4,3.1,5.5,1.8,2
84,5.4,3.0,4.5,1.5,1
27,5.2,3.5,1.5,0.2,0
132,6.4,2.8,5.6,2.2,2
59,5.2,2.7,3.9,1.4,1
...,...,...,...,...,...
21,5.1,3.7,1.5,0.4,0
9,4.9,3.1,1.5,0.1,0
103,6.3,2.9,5.6,1.8,2
67,5.8,2.7,4.1,1.0,1


Val Set
<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 127 to 47
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  30 non-null     float64
 1   sepal width (cm)   30 non-null     float64
 2   petal length (cm)  30 non-null     float64
 3   petal width (cm)   30 non-null     float64
 4   class              30 non-null     int64  
dtypes: float64(4), int64(1)
memory usage: 1.4 KB


None





class
0    0.400000
1    0.333333
2    0.266667
Name: proportion, dtype: float64




Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
127,6.1,3.0,4.9,1.8,2
92,5.8,2.6,4.0,1.2,1
112,6.8,3.0,5.5,2.1,2
69,5.6,2.5,3.9,1.1,1
80,5.5,2.4,3.8,1.1,1
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
30,4.8,3.1,1.6,0.2,0
94,5.6,2.7,4.2,1.3,1
64,5.6,2.9,3.6,1.3,1


### R-Stratified Split