In [None]:
from pandas import read_csv
from matplotlib import pyplot
from pandas.plotting import scatter_matrix
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC

# Part I
def load_dataset():
    url = "iris.csv"
    dataset = read_csv(url)
    return dataset

# Part II
def summarize_dataset(dataset):
    print("Dataset dimension:")
    print(dataset.shape)
    print("\nFirst 10 rows of dataset:")
    print(dataset.head(10))
    print("\nStatistical summary:")
    print(dataset.describe())
    print("\nClass Distribution:")
    print(dataset.groupby('class').size())

# Part III
def print_plot_univariate(dataset):
    dataset.hist()
    pyplot.show()

def print_plot_multivariate(dataset):
    scatter_matrix(dataset)
    pyplot.show()

# Part IV
def my_print_and_test_models(dataset):
    array = dataset.values
    X = array[:, 0:4]
    y = array[:, 4]
    X_train, X_validation, Y_train, Y_validation = train_test_split(
        X, y, test_size=0.20, random_state=1
    )

    models = [
        ('DecisionTree', DecisionTreeClassifier()),
        ('GaussianNB', GaussianNB()),
        ('KNeighbors', KNeighborsClassifier()),
        ('LogisticRegression', LogisticRegression(max_iter=200)),
        ('LinearDiscriminant', LinearDiscriminantAnalysis()),
        ('SVM', SVC(gamma='auto')),
    ]

    print("\nModel Evaluation Results:")
    for name, model in models:
        kfold = KFold(n_splits=10, random_state=1, shuffle=True)
        cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring='accuracy')
        print(f"{name}: {cv_results.mean():.6f} ({cv_results.std():.6f})")