In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from pyod.models.deep_svdd import DeepSVDD


In [2]:
df = pd.read_csv('dataset.csv')

In [3]:
df_cleaned = df.dropna()
feature_columns=['chem_num_atoms', 'chem_volume', 'chem_density', 'chem_avg_atomic_mass',
       'chem_avg_electronegativity', 'chem_electronegativity_variance',
       'chem_metal_fraction', 'chem_num_unique_elements',
       'chem_metal_atom_count', 'chem_volume_per_atom', 'geo_surface_area_m2g',
       'geo_surface_area_m2cm3', 'geo_void_fraction', 'geo_pld', 'geo_lcd',
       'link_linker_atom_fraction', 'link_linker_bond_length_mean',
       'link_linker_bond_length_std', 'link_metal_coord_number_mean',
       'topo_avg_node_connectivity', 'topo_avg_ring_size',
       'topo_coordination_number_mean', 'topo_degree_assortativity',
       'topo_degree_centrality_mean', 'topo_graph_density',
       'topo_graph_entropy', 'topo_graph_transitivity',
       'topo_largest_cc_fraction', 'topo_node_connectivity_std',
       'topo_num_connected_components', 'topo_num_edges', 'topo_num_nodes']

In [4]:
X = df_cleaned[feature_columns]

scaler_X = StandardScaler()
scaler_X.fit(X)
X_scaled=scaler_X.transform(X)


In [7]:
mof_ids = df_cleaned['MOF_ID'].copy()

In [None]:

contamination_rate = 0.05

clf = DeepSVDD(
    use_ae=True,
    hidden_neurons=[16, 8],
    epochs=100,
    batch_size=32,
    dropout_rate=0.1,
    contamination=contamination_rate,
    verbose=1
)

# Train the model on the scaled data
clf.fit(X_scaled)




In [8]:
anomaly_scores = clf.decision_function(X_scaled)
anomaly_labels = clf.predict(X_scaled)


results_df = pd.DataFrame({
    'mof_id': mof_ids,
    'deep_svdd_score': anomaly_scores,
    'deep_svdd_anomaly_label': anomaly_labels
})



In [9]:
results_df_sorted = results_df.sort_values(by='deep_svdd_score', ascending=False)

In [11]:
print("\n--- Top 10 Most Anomalous MOFs (according to Deep SVDD) ---")
print(results_df_sorted.head(10))


--- Top 10 Most Anomalous MOFs (according to Deep SVDD) ---
                             mof_id  deep_svdd_score  deep_svdd_anomaly_label
6504                   hMOF-1002818         3.275599                        1
936    c6ce00407e_c6ce00407e5_clean         1.451180                        1
937    c6ce00407e_c6ce00407e6_clean         0.387846                        1
26101                  XICYUF_clean         0.251571                        1
19010                  ICIKIP_clean         0.237343                        1
2265                   ERANAO_clean         0.233862                        1
23016                  QOYYOU_clean         0.217134                        1
19549        ja403810k_si_003_clean         0.205295                        1
19790                  JONKEE_clean         0.188737                        1
23349                  RIVDIL_clean         0.182875                        1
