In [None]:
%pip install ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7


In [None]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
concrete_compressive_strength = fetch_ucirepo(id=165)

# data (as pandas dataframes)
X = concrete_compressive_strength.data.features
y = concrete_compressive_strength.data.targets

# metadata
print(concrete_compressive_strength.metadata)

# variable information
print(concrete_compressive_strength.variables)


{'uci_id': 165, 'name': 'Concrete Compressive Strength', 'repository_url': 'https://archive.ics.uci.edu/dataset/165/concrete+compressive+strength', 'data_url': 'https://archive.ics.uci.edu/static/public/165/data.csv', 'abstract': 'Concrete is the most important material in civil engineering. The concrete compressive strength is a highly nonlinear function of age and ingredients. ', 'area': 'Physics and Chemistry', 'tasks': ['Regression'], 'characteristics': ['Multivariate'], 'num_instances': 1030, 'num_features': 8, 'feature_types': ['Real'], 'demographics': [], 'target_col': ['Concrete compressive strength'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1998, 'last_updated': 'Sun Feb 11 2024', 'dataset_doi': '10.24432/C5PK67', 'creators': ['I-Cheng Yeh'], 'intro_paper': {'ID': 383, 'type': 'NATIVE', 'title': 'Modeling of strength of high-performance concrete using artificial neural networks', 'authors': 'I. Yeh', 'venue': 'C

In [None]:
from ANN_PSO import ANN_PSO



In [None]:
print(y.values[500:])

[[57.92]
 [25.61]
 [33.49]
 [59.59]
 [29.55]
 [37.92]
 [61.86]
 [62.05]
 [32.01]
 [72.1 ]
 [39.  ]
 [65.7 ]
 [32.11]
 [40.29]
 [74.36]
 [21.97]
 [ 9.85]
 [15.07]
 [23.25]
 [43.73]
 [13.4 ]
 [24.13]
 [44.52]
 [62.94]
 [59.49]
 [25.12]
 [23.64]
 [35.75]
 [38.61]
 [68.75]
 [66.78]
 [23.85]
 [32.07]
 [11.65]
 [19.2 ]
 [48.85]
 [39.6 ]
 [43.94]
 [34.57]
 [54.32]
 [24.4 ]
 [15.62]
 [21.86]
 [10.22]
 [14.6 ]
 [18.75]
 [31.97]
 [23.4 ]
 [25.57]
 [41.68]
 [27.74]
 [ 8.2 ]
 [ 9.62]
 [25.42]
 [15.69]
 [27.94]
 [32.63]
 [17.24]
 [19.77]
 [39.44]
 [25.75]
 [33.08]
 [24.07]
 [21.82]
 [21.07]
 [14.84]
 [32.05]
 [11.96]
 [25.45]
 [22.49]
 [25.22]
 [39.7 ]
 [13.09]
 [38.7 ]
 [ 7.51]
 [17.58]
 [21.18]
 [18.2 ]
 [17.2 ]
 [22.63]
 [21.86]
 [12.37]
 [25.73]
 [37.81]
 [21.92]
 [33.04]
 [14.54]
 [26.91]
 [ 8.  ]
 [31.9 ]
 [10.34]
 [19.77]
 [37.44]
 [11.48]
 [24.44]
 [17.6 ]
 [10.73]
 [31.38]
 [13.22]
 [20.97]
 [27.04]
 [32.04]
 [35.17]
 [36.45]
 [38.89]
 [ 6.47]
 [12.84]
 [18.42]
 [21.95]
 [24.1 ]
 [25.08]
 

In [None]:
from sklearn.preprocessing import StandardScaler

# 1. standardize the data
scaler_x = StandardScaler()
scaler_y = StandardScaler()

x_scaled = scaler_x.fit_transform(x.values)

y_scaled = scaler_y.fit_transform(y.values.reshape(-1,1))

x_train_scaled = x_scaled[:800]
x_test_scaled  = x_scaled[800:]

y_train_scaled = y_scaled[:800]
y_test         = y.values[800:]

# 2. initialize the model
ann_pso = ANN_PSO(
    [50,25,1],
    "tanh", "linear",
    60, 700,
    0.8, 1.6, 0.95, 0.2, 0.0005,
    -1.2, 1.2
)

# 3. train the model
ann_pso.fit(x_train_scaled, y_train_scaled)

# 4. predict and transform back to original scale
y_pred_scaled = ann_pso.predict(x_test_scaled)
y_pred = scaler_y.inverse_transform(y_pred_scaled)


It took a long time to train, and in colab it need to train again if we close the page. So let's save the model


In [None]:
import pickle
import os

# save standardliser
model_data = {
    'ann_pso': ann_pso,
    'scaler_X': scaler_X,
    'scaler_y': scaler_y
}

with open('/content/concrete_model.pkl', 'wb') as f:
    pickle.dump(model_data, f)

print("Model saving success")


Model saving success


And then let's load the model

In [None]:
def load_model():
    try:
        with open('/content/concrete_model.pkl', 'rb') as f:
            model_data = pickle.load(f)

        print("Data load successfully")
        return model_data['ann_pso'], model_data['scaler_X'], model_data['scaler_y']

    except FileNotFoundError:
        print("File not found")
        return None
    except Exception as e:
        print(f"Fail to load: {e}")
        return None

# load model
ann_pso_loaded, scaler_X_loaded, scaler_y_loaded = load_model()

if ann_pso_loaded is not None:
    print("Model loaded successfully")
else:
    print("Failed to load the model")

Data load successfully
Model loaded successfully


In [None]:
print("pred:", y_pred[:10])
print("true:", y_test[:10])

pred: [[10.03779966]
 [ 2.08302778]
 [21.65145428]
 [64.82472119]
 [62.62714602]
 [60.89249991]
 [45.92268457]
 [43.49303126]
 [11.1892558 ]
 [ 2.08302778]]
true: [[13.71]
 [19.69]
 [31.65]
 [19.11]
 [39.58]
 [48.79]
 [24.  ]
 [37.42]
 [11.47]
 [19.69]]


In [None]:
import numpy as np
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold


def cross_validate_ann_pso(X, y, model_params, n_splits=5):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

    mae_scores = []
    rmse_scores = []

    fold = 1
    for train_idx, test_idx in kf.split(X):

        print(f"\n===== FOLD {fold}/{n_splits} =====")

        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        # Standardisation
        scaler_X = StandardScaler()
        scaler_y = StandardScaler()

        X_train_scaled = scaler_X.fit_transform(X_train)
        X_test_scaled = scaler_X.transform(X_test)

        y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1,1))
        y_test_true = y_test  # valeurs réelles non-scalées

        # Initialiser un nouveau modèle ANN_PSO
        ann = ANN_PSO(*model_params)

        # Entraîner
        ann.fit(X_train_scaled, y_train_scaled)

        # Prédiction
        y_pred_scaled = ann.predict(X_test_scaled)
        y_pred = scaler_y.inverse_transform(y_pred_scaled)

        # Metrics
        mae  = mean_absolute_error(y_test_true, y_pred)


        mae_scores.append(mae)


        print(f"MAE  = {mae:.4f}")


        fold += 1

    print("\n========== Résultats finaux ==========")
    print(f"MAE moyen  = {np.mean(mae_scores):.4f}  (± {np.std(mae_scores):.4f})")
    print(f"RMSE moyen = {np.mean(rmse_scores):.4f}  (± {np.std(rmse_scores):.4f})")

    return mae_scores, rmse_scores
model_params = (
    [50,25,1],         # architecture
    "tanh", "linear",  # activations
    60, 700,           # swarmsize, n_iter
    0.8, 1.6, 0.95,    # alpha, beta, gamma
    0.2, 0.0005,       # sigma, epsilon
    -1.2, 1.2          # min_bound, max_bound
)
mae_scores, rmse_scores = cross_validate_ann_pso(
    X.values,
    y.values,
    model_params,
    n_splits=5
)



===== FOLD 1/5 =====
MAE  = 23.1960

===== FOLD 2/5 =====
MAE  = 22.2191

===== FOLD 3/5 =====
MAE  = 21.0435

===== FOLD 4/5 =====
MAE  = 20.4427

===== FOLD 5/5 =====


KeyboardInterrupt: 

Q1 : ANN Architecture Effects (with 5-Fold Cross Validation)



In [18]:
import pickle
import os
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import KFold
import time

def study_ann_architecture_with_cv():

    print("STUDY 1: ANN Architecture Effects (with 5-Fold Cross Validation)")

    # Load data
    from ucimlrepo import fetch_ucirepo
    concrete_compressive_strength = fetch_ucirepo(id=165)
    X = concrete_compressive_strength.data.features.values
    y = concrete_compressive_strength.data.targets.values

    # Create folder
    study_dir = '/content/study1_architecture_cv'
    os.makedirs(study_dir, exist_ok=True)

    # Architectures set
    architectures = [
        ([10, 1], "1hl_10n"),
        ([20, 10, 1], "2hl_20_10n"),
        ([50, 25, 1], "3hl_50_25n"),
        ([30, 20, 10, 1], "4hl_30_20_10n")
    ]

    all_results = []

    for arch, arch_name in architectures:
        print(f"\n Testing architecture: {arch_name} - {arch}")

        # Parameters
        model_params = (arch, "tanh", "linear", 30, 100, 0.8, 1.6, 0.95, 0.2, 0.0005, -1.2, 1.2)

        # 10 times independent run & 5 fold cv
        run_results = execute_independent_runs_with_cv(X, y, model_params, f"arch_{arch_name}",
                                                      study_dir, n_runs=10, n_splits=5)
        all_results.extend(run_results)

    # save data
    results_df = pd.DataFrame(all_results)
    results_file = f'{study_dir}/architecture_study_cv_results.csv'
    results_df.to_csv(results_file, index=False)

    print(f"\n Completed! ")
    print(f" Results saved to: {results_file}")

    display_architecture_cv_results(results_df)
    return results_df

10 times independent run & 5 fold cv

In [17]:
def execute_independent_runs_with_cv(X, y, model_params, config_name, study_dir, n_runs=10, n_splits=5):
# 10 times independent run & 5 fold cv
    results = []

    for run in range(n_runs):
        run_name = f"{config_name}_run{run+1:02d}"
        print(f" {run_name} (with {n_splits}-fold CV)...")

        # If result exist?
        result_file = f"{study_dir}/cv_results_{run_name}.pkl"

        if os.path.exists(result_file):
            # Load exist result
            with open(result_file, 'rb') as f:
                run_result = pickle.load(f)
            print(f"  Loaded existing CV results - Avg MAE: {run_result['cv_mae_mean']:.4f}")
        else:
            # execute new one
            run_result = execute_single_cv_run(X, y, model_params, run, run_name, n_splits)

            # save result
            with open(result_file, 'wb') as f:
                pickle.dump(run_result, f)

            print(f"CV - Avg MAE: {run_result['cv_mae_mean']:.4f} ± {run_result['cv_mae_std']:.4f}")

        results.append(run_result)

    return results

Single run with 5 folds

In [19]:
def execute_single_cv_run(X, y, model_params, run_seed, run_name, n_splits=5):
# Single run with 5 folds
    start_time = time.time()

    # random seeds
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42 + run_seed * 13)

    fold_mae_scores = []
    fold_rmse_scores = []
    fold_times = []
    all_fold_results = []

    for fold, (train_idx, test_idx) in enumerate(kf.split(X)):
        fold_start = time.time()

        # Data preparation
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        # Standardisation
        scaler_X = StandardScaler()
        scaler_y = StandardScaler()
        X_train_scaled = scaler_X.fit_transform(X_train)
        X_test_scaled = scaler_X.transform(X_test)
        y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1))

        # Modle train
        ann = ANN_PSO(*model_params)
        ann.fit(X_train_scaled, y_train_scaled)

        # predictation
        y_pred_scaled = ann.predict(X_test_scaled)
        y_pred = scaler_y.inverse_transform(y_pred_scaled)

        # Evaluation
        mae = mean_absolute_error(y_test, y_pred)
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))

        fold_time = time.time() - fold_start

        # Save all result
        fold_result = {
            'fold': fold + 1,
            'mae': mae,
            'rmse': rmse,
            'training_time': fold_time,
            'y_test': y_test,
            'y_pred': y_pred
        }
        all_fold_results.append(fold_result)

        fold_mae_scores.append(mae)
        fold_rmse_scores.append(rmse)
        fold_times.append(fold_time)

    total_time = time.time() - start_time

    result = {
        'run_name': run_name,
        'config_name': run_name.split('_run')[0],
        'cv_mae_mean': np.mean(fold_mae_scores),
        'cv_mae_std': np.std(fold_mae_scores),
        'cv_rmse_mean': np.mean(fold_rmse_scores),
        'cv_rmse_std': np.std(fold_rmse_scores),
        'cv_time_mean': np.mean(fold_times),
        'cv_time_std': np.std(fold_times),
        'total_training_time': total_time,
        'model_params': model_params,
        'run_seed': run_seed,
        'n_splits': n_splits,
        'fold_results': all_fold_results,
        'timestamp': time.time()
    }

    return result

Display results

In [20]:
def display_architecture_cv_results(results_df):

    print("\n ARCHITECTURE STUDY - CROSS VALIDATION RESULTS")
    print("=" * 65)

    summary = results_df.groupby('config_name').agg({
        'cv_mae_mean': ['mean', 'std', 'min', 'max'],
        'cv_rmse_mean': ['mean', 'std'],
        'cv_time_mean': ['mean', 'std'],
        'total_training_time': 'mean'
    }).round(4)

    print("Cross-Validation Performance (Mean ± Std across 10 independent runs):")
    print(summary)

    # Find Best run
    best_run = results_df.loc[results_df['cv_mae_mean'].idxmin()]
    print(f"\n BEST CONFIGURATION: {best_run['run_name']}")
    print(f"   CV MAE: {best_run['cv_mae_mean']:.4f} ± {best_run['cv_mae_std']:.4f}")
    print(f"   Architecture: {best_run['model_params'][0]}")



In [None]:
architecture_cv_results = study_ann_architecture_with_cv()

STUDY 1: ANN Architecture Effects (with 5-Fold Cross Validation)

 Testing architecture: 1hl_10n - [10, 1]
 arch_1hl_10n_run01 (with 5-fold CV)...
  Loaded existing CV results - Avg MAE: 15.2909
 arch_1hl_10n_run02 (with 5-fold CV)...
  Loaded existing CV results - Avg MAE: 14.4590
 arch_1hl_10n_run03 (with 5-fold CV)...
  Loaded existing CV results - Avg MAE: 15.9271
 arch_1hl_10n_run04 (with 5-fold CV)...
  Loaded existing CV results - Avg MAE: 15.4454
 arch_1hl_10n_run05 (with 5-fold CV)...
  Loaded existing CV results - Avg MAE: 16.9292
 arch_1hl_10n_run06 (with 5-fold CV)...
  Loaded existing CV results - Avg MAE: 15.1520
 arch_1hl_10n_run07 (with 5-fold CV)...
  Loaded existing CV results - Avg MAE: 15.6284
 arch_1hl_10n_run08 (with 5-fold CV)...
  Loaded existing CV results - Avg MAE: 13.2228
 arch_1hl_10n_run09 (with 5-fold CV)...
  Loaded existing CV results - Avg MAE: 15.3313
 arch_1hl_10n_run10 (with 5-fold CV)...
  Loaded existing CV results - Avg MAE: 14.7720

 Testing arc