In [10]:
import os

import pandas as pd
from sklearn.preprocessing import StandardScaler
from ucimlrepo import fetch_ucirepo

In [None]:
def get_ionosphere_data(csv_path="ionosphere_data.csv", scale=True):
    """Load ionosphere data from CSV if it exists, otherwise fetch from UCI repository"""

    if os.path.exists(csv_path):
        print(f"Loading data from existing file: {csv_path}")
        # Load the data
        Xy_loaded = pd.read_csv(csv_path)

        # Split into features and target
        X_loaded = Xy_loaded.iloc[:, :-1]  # All columns except last
        y_loaded = Xy_loaded.iloc[:, -1]  # Just the last column

        return X_loaded.to_numpy(), y_loaded.to_numpy()
    else:
        print(f"File {csv_path} not found. Downloading from UCI repository...")

        # Fetch the dataset
        ionosphere = fetch_ucirepo(id=52)

        # Get features and targets
        X = ionosphere.data.features
        y_class = ionosphere.data.targets

        # Convert target labels
        y = y_class.copy()
        y["Class"] = y_class["Class"].map({"g": 1, "b": -1})

        # Apply scaling to the features
        scaler = StandardScaler()
        X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

        # Create combined dataframe with scaled features
        Xy = pd.concat([X_scaled, y], axis=1)

        # Save to CSV
        Xy.to_csv(csv_path, index=False)
        print(f"Downloaded data, scaled features, and saved to {csv_path}")

        return X_scaled.to_numpy(), y["Class"].to_numpy()


In [14]:
# Usage
X, y = get_ionosphere_data()
print(f"Loaded data with {X.shape[0]} samples and {X.shape[1]} features")

File ionosphere_data.csv not found. Downloading from UCI repository...
Downloaded data, scaled features, and saved to ionosphere_data.csv
Loaded data with 351 samples and 34 features


numpy.float64