In [None]:
# Let's start to process the irirs dataset
# We will use the pandas library to load the data
import pandas as pd
import numpy as np
from typing import List, Tuple

# Load the data
data = pd.read_csv('iris.csv', header=None)
print(data.head())

# Let's see the shape of the data
print(data.shape)

# Let's see the data types of the columns
print(data.dtypes)

In [None]:
# Check for missing values
print(data.isnull().sum())
data.iloc[:, 4]

In [None]:
# Map the target values to integers for the classification
data[4] = data[4].map({'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2})
print(data.head())

In [None]:
# Let's process the data for winequality-white.csv
data = pd.read_csv('winequality-white.csv', sep=';')
print(data.head())

In [18]:
# Check for missing values
print(data.isnull().sum())
# Check the data types
print(data.dtypes)

fixed acidity           0
volatile acidity        0
citric acid             0
residual sugar          0
chlorides               0
free sulfur dioxide     0
total sulfur dioxide    0
density                 0
pH                      0
sulphates               0
alcohol                 0
quality                 0
dtype: int64


In [None]:
# Define data handling functions
def load_iris() -> Tuple[np.ndarray, np.ndarray]:
    """Load the Iris dataset and return the features and labels"""
    data = pd.read_csv("iris.csv")
    # Map the target values to integers for the classification
    data['species'] = data['species'].map({'setosa': 0, 'versicolor': 1, 'virginica': 2})
    X = data.iloc[:, :-1].values
    y = pd.get_dummies(data.iloc[:, -1]).values
    return X, y


def load_and_normalize_wine_quality() -> Tuple[np.ndarray, np.ndarray]:
    """Load the Wine Quality dataset and normalize the features"""
    data = pd.read_csv("winequality-white.csv", delimiter=';')
    normalized_data = (data.iloc[:, :-1] - data.iloc[:, :-1].min()) / (data.iloc[:, :-1].max() - data.iloc[:, :-1].min())
    y = pd.get_dummies(data.iloc[:, -1]).values
    return normalized_data.values, y


def custom_train_test_split(X: np.ndarray, y: np.ndarray, test_size=0.2, random_state=42) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """Custom train test split function to split the data into training and testing sets"""
    np.random.seed(random_state)
    indices = np.random.permutation(X.shape[0])
    test_size = int(X.shape[0] * test_size)
    test_indices = indices[:test_size]
    train_indices = indices[test_size:]
    return X[train_indices], X[test_indices], y[train_indices], y[test_indices]


In [None]:
load_iris()