# Getting The Data

In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from nuc_data_tool.db.fetch_data import fetch_files_by_name, fetch_data_by_filename_and_physical_quantities
files = fetch_files_by_name()

In [2]:
import pandas as pd

nuc_data = pd.DataFrame()

for file in files:
    dict_nuc_data = fetch_data_by_filename_and_physical_quantities(file, 'all', False)

    for pq in dict_nuc_data:
        if dict_nuc_data[pq].empty:
            continue

        nuc_data = nuc_data.append(dict_nuc_data[pq], ignore_index=True)

nuc_data.info

<bound method DataFrame.info of          nuc_ix     name first_step last_step
0         10010       H1          0         0
1         10020       H2          0         0
2         10030       H3          0         0
3         10040       H4          0         0
4         10050       H5          0         0
...         ...      ...        ...       ...
948143  1102720    Ds272          0         0
948144  1102730    Ds273          0         0
948145  1102791  Ds279m1          0         0
948146  1112720    Rg272          0         0
948147  1112990   Pseudo          0         0

[948148 rows x 4 columns]>

# Setting up Environment

In [3]:
unnecessary_columns = ['nuc_ix', 'name']
numeric_columns = [col for col in nuc_data.columns.tolist() if col not in unnecessary_columns]

In [4]:
from pycaret.anomaly import *

exp_ano = setup(nuc_data, normalize = True, 
                ignore_features = unnecessary_columns,
                numeric_features = numeric_columns,
                session_id = 123)

Setup Succesfully Completed!


Unnamed: 0,Description,Value
0,session_id,123
1,Original Data,"(948148, 4)"
2,Missing Values,False
3,Numeric Features,2
4,Categorical Features,2
5,Ordinal Features,False
6,High Cardinality Features,False
7,Transformed Data,"(948148, 2)"
8,Numeric Imputer,mean
9,Categorical Imputer,constant


# Create a Model

In [5]:
iforest = create_model('iforest')

In [6]:
iforest

IForest(behaviour='new', bootstrap=False, contamination=0.05,
    max_features=1.0, max_samples='auto', n_estimators=100, n_jobs=1,
    random_state=123, verbose=0)

# Assign a Model

In [7]:
iforest_results = assign_model(iforest)

In [8]:
iforest_results.head()

Unnamed: 0,nuc_ix,name,first_step,last_step,Label,Score
0,10010,H1,0,0,0,-0.0
1,10020,H2,0,0,0,-0.0
2,10030,H3,0,0,0,-0.0
3,10040,H4,0,0,0,-0.0
4,10050,H5,0,0,0,-0.0


In [9]:
iforest_results[iforest_results.Label == 1]

Unnamed: 0,nuc_ix,name,first_step,last_step,Label,Score
3513,922350,U235,77,68,1,0.028834
3517,922380,U238,2455,2448,1,0.032346
7334,922350,U235,77,68,1,0.028834
7338,922380,U238,2455,2448,1,0.032346
11155,922350,U235,77,68,1,0.028834
...,...,...,...,...,...,...
940202,922380,U238,2455,2454,1,0.032346
944019,922350,U235,77,76,1,0.028834
944023,922380,U238,2455,2454,1,0.032346
947840,922350,U235,77,71,1,0.028834


# Plot a Model

In [None]:
plot_model(iforest)

In [None]:
plot_model(iforest, plot = 'umap')

# Saving the Model

In [10]:
save_model(iforest,'NUC IForest Model')

Transformation Pipeline and Model Succesfully Saved


# Predict on Unseen Data

In [11]:
predictions = predict_model(iforest, data=nuc_data)

In [12]:
predictions.head()

Unnamed: 0,nuc_ix,name,first_step,last_step,Label,Score
0,10010,H1,0,0,0,-0.0
1,10020,H2,0,0,0,-0.0
2,10030,H3,0,0,0,-0.0
3,10040,H4,0,0,0,-0.0
4,10050,H5,0,0,0,-0.0


In [13]:
predictions[predictions.Label == 1]

Unnamed: 0,nuc_ix,name,first_step,last_step,Label,Score
3513,922350,U235,77,68,1,0.028834
3517,922380,U238,2455,2448,1,0.032346
7334,922350,U235,77,68,1,0.028834
7338,922380,U238,2455,2448,1,0.032346
11155,922350,U235,77,68,1,0.028834
...,...,...,...,...,...,...
940202,922380,U238,2455,2454,1,0.032346
944019,922350,U235,77,76,1,0.028834
944023,922380,U238,2455,2454,1,0.032346
947840,922350,U235,77,71,1,0.028834


# Loading the Saved Model

In [14]:
saved_iforest = load_model('NUC IForest Model')

Transformation Pipeline and Model Successfully Loaded


In [15]:
new_prediction = predict_model(saved_iforest, data=nuc_data)

In [16]:
new_prediction.head()

Unnamed: 0,nuc_ix,name,first_step,last_step,Label,Score
0,10010,H1,0,0,0,-0.0
1,10020,H2,0,0,0,-0.0
2,10030,H3,0,0,0,-0.0
3,10040,H4,0,0,0,-0.0
4,10050,H5,0,0,0,-0.0


In [17]:
new_prediction[new_prediction.Label == 1]

Unnamed: 0,nuc_ix,name,first_step,last_step,Label,Score
3513,922350,U235,77,68,1,0.028834
3517,922380,U238,2455,2448,1,0.032346
7334,922350,U235,77,68,1,0.028834
7338,922380,U238,2455,2448,1,0.032346
11155,922350,U235,77,68,1,0.028834
...,...,...,...,...,...,...
940202,922380,U238,2455,2454,1,0.032346
944019,922350,U235,77,76,1,0.028834
944023,922380,U238,2455,2454,1,0.032346
947840,922350,U235,77,71,1,0.028834
