In [1]:
%pip install ucimlrepo

Note: you may need to restart the kernel to use updated packages.


In [2]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
concrete_compressive_strength = fetch_ucirepo(id=165) 
  
# data (as pandas dataframes) 
X = concrete_compressive_strength.data.features 
y = concrete_compressive_strength.data.targets 
  
# metadata 
print(concrete_compressive_strength.metadata) 
  
# variable information 
print(concrete_compressive_strength.variables) 


{'uci_id': 165, 'name': 'Concrete Compressive Strength', 'repository_url': 'https://archive.ics.uci.edu/dataset/165/concrete+compressive+strength', 'data_url': 'https://archive.ics.uci.edu/static/public/165/data.csv', 'abstract': 'Concrete is the most important material in civil engineering. The concrete compressive strength is a highly nonlinear function of age and ingredients. ', 'area': 'Physics and Chemistry', 'tasks': ['Regression'], 'characteristics': ['Multivariate'], 'num_instances': 1030, 'num_features': 8, 'feature_types': ['Real'], 'demographics': [], 'target_col': ['Concrete compressive strength'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1998, 'last_updated': 'Sun Feb 11 2024', 'dataset_doi': '10.24432/C5PK67', 'creators': ['I-Cheng Yeh'], 'intro_paper': {'ID': 383, 'type': 'NATIVE', 'title': 'Modeling of strength of high-performance concrete using artificial neural networks', 'authors': 'I. Yeh', 'venue': 'C

In [3]:
from ANN_PSO import ANN_PSO



In [4]:
print(y.values[500:])

[[57.92]
 [25.61]
 [33.49]
 [59.59]
 [29.55]
 [37.92]
 [61.86]
 [62.05]
 [32.01]
 [72.1 ]
 [39.  ]
 [65.7 ]
 [32.11]
 [40.29]
 [74.36]
 [21.97]
 [ 9.85]
 [15.07]
 [23.25]
 [43.73]
 [13.4 ]
 [24.13]
 [44.52]
 [62.94]
 [59.49]
 [25.12]
 [23.64]
 [35.75]
 [38.61]
 [68.75]
 [66.78]
 [23.85]
 [32.07]
 [11.65]
 [19.2 ]
 [48.85]
 [39.6 ]
 [43.94]
 [34.57]
 [54.32]
 [24.4 ]
 [15.62]
 [21.86]
 [10.22]
 [14.6 ]
 [18.75]
 [31.97]
 [23.4 ]
 [25.57]
 [41.68]
 [27.74]
 [ 8.2 ]
 [ 9.62]
 [25.42]
 [15.69]
 [27.94]
 [32.63]
 [17.24]
 [19.77]
 [39.44]
 [25.75]
 [33.08]
 [24.07]
 [21.82]
 [21.07]
 [14.84]
 [32.05]
 [11.96]
 [25.45]
 [22.49]
 [25.22]
 [39.7 ]
 [13.09]
 [38.7 ]
 [ 7.51]
 [17.58]
 [21.18]
 [18.2 ]
 [17.2 ]
 [22.63]
 [21.86]
 [12.37]
 [25.73]
 [37.81]
 [21.92]
 [33.04]
 [14.54]
 [26.91]
 [ 8.  ]
 [31.9 ]
 [10.34]
 [19.77]
 [37.44]
 [11.48]
 [24.44]
 [17.6 ]
 [10.73]
 [31.38]
 [13.22]
 [20.97]
 [27.04]
 [32.04]
 [35.17]
 [36.45]
 [38.89]
 [ 6.47]
 [12.84]
 [18.42]
 [21.95]
 [24.1 ]
 [25.08]
 

In [7]:
from sklearn.preprocessing import StandardScaler

# 1. STANDARDISER
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_scaled = scaler_X.fit_transform(X.values)


y_scaled = scaler_y.fit_transform(y.values.reshape(-1,1))

X_train_scaled = X_scaled[:800]
X_test_scaled  = X_scaled[800:]

y_train_scaled = y_scaled[:800]
y_test         = y.values[800:]


# 2. AJUSTER LES BORNES
ann_pso = ANN_PSO(
    [50,25,1],
    "tanh", "linear",
    60, 700,
    0.8, 1.6, 0.95, 0.2, 0.0005,
    -1.2, 1.2
)

# 3. ENTRAÎNER
ann_pso.fit(X_train_scaled, y_train_scaled)

# 4. PRÉDIRE ET RETOUR À L'ÉCHELLE ORIGINALE
y_pred_scaled = ann_pso.predict(X_test_scaled)
y_pred = scaler_y.inverse_transform(y_pred_scaled)





In [8]:
print("pred:", y_pred[:10])
print("true:", y_test[:10])

pred: [[21.72019134]
 [28.431094  ]
 [26.41314401]
 [ 9.39649438]
 [ 4.71984764]
 [17.14847264]
 [24.84287148]
 [32.901977  ]
 [20.63830833]
 [28.431094  ]]
true: [[13.71]
 [19.69]
 [31.65]
 [19.11]
 [39.58]
 [48.79]
 [24.  ]
 [37.42]
 [11.47]
 [19.69]]


In [9]:
import numpy as np
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold


def cross_validate_ann_pso(X, y, model_params, n_splits=5):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

    mae_scores = []
    rmse_scores = []

    fold = 1
    for train_idx, test_idx in kf.split(X):

        print(f"\n===== FOLD {fold}/{n_splits} =====")

        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        # Standardisation
        scaler_X = StandardScaler()
        scaler_y = StandardScaler()

        X_train_scaled = scaler_X.fit_transform(X_train)
        X_test_scaled = scaler_X.transform(X_test)

        y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1,1))
        y_test_true = y_test  # valeurs réelles non-scalées

        # Initialiser un nouveau modèle ANN_PSO
        ann = ANN_PSO(*model_params)

        # Entraîner
        ann.fit(X_train_scaled, y_train_scaled)

        # Prédiction
        y_pred_scaled = ann.predict(X_test_scaled)
        y_pred = scaler_y.inverse_transform(y_pred_scaled)

        # Metrics
        mae  = mean_absolute_error(y_test_true, y_pred)
        

        mae_scores.append(mae)
        

        print(f"MAE  = {mae:.4f}")


        fold += 1

    print("\n========== Résultats finaux ==========")
    print(f"MAE moyen  = {np.mean(mae_scores):.4f}  (± {np.std(mae_scores):.4f})")
    print(f"RMSE moyen = {np.mean(rmse_scores):.4f}  (± {np.std(rmse_scores):.4f})")

    return mae_scores, rmse_scores
model_params = (
    [50,25,1],         # architecture
    "tanh", "linear",  # activations
    60, 700,           # swarmsize, n_iter
    0.8, 1.6, 0.95,    # alpha, beta, gamma
    0.2, 0.0005,       # sigma, epsilon
    -1.2, 1.2          # min_bound, max_bound
)
mae_scores, rmse_scores = cross_validate_ann_pso(
    X.values,
    y.values,
    model_params,
    n_splits=5
)



===== FOLD 1/5 =====
MAE  = 23.1960

===== FOLD 2/5 =====
MAE  = 22.2191

===== FOLD 3/5 =====
MAE  = 21.0435

===== FOLD 4/5 =====
MAE  = 20.4427

===== FOLD 5/5 =====


KeyboardInterrupt: 