# Getting The Data

In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from nuc_data_tool.db.fetch_data import fetch_files_by_name, fetch_data_by_filename_and_physical_quantities
files = fetch_files_by_name()

In [2]:
import pandas as pd

nuc_data = pd.DataFrame(columns=['nuc_ix', 'name'])

for file in files:
    dict_nuc_data = fetch_data_by_filename_and_physical_quantities(file, 'isotope', True)

    for pq in dict_nuc_data:
        if dict_nuc_data[pq].empty:
            continue

        dict_nuc_data[pq].rename(columns={'first_step': f'{file.name}_first_step',
                                         'last_step': f'{file.name}_last_step'},
                                inplace=True)
        columns = {col: f'{file.name}_{col}'
                           for col in dict_nuc_data[pq].columns.tolist()
                           if 'middle_step' in col}
        dict_nuc_data[pq].rename(columns=columns, inplace=True)
        nuc_data = pd.merge(nuc_data, dict_nuc_data[pq], how='outer', on=['nuc_ix', 'name'])

In [3]:
nuc_data.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3821 entries, 0 to 3820
Data columns (total 10535 columns):
 #      Column                   Dtype 
---     ------                   ----- 
 0      nuc_ix                   int64 
 1      name                     object
 2      001_first_step           object
 3      001_last_step            object
 4      001_middle_step_1        object
 5      001_middle_step_2        object
 6      001_middle_step_3        object
 7      001_middle_step_4        object
 8      001_middle_step_5        object
 9      001_middle_step_6        object
 10     001_middle_step_7        object
 11     001_middle_step_8        object
 12     001_middle_step_9        object
 13     001_middle_step_10       object
 14     001_middle_step_11       object
 15     001_middle_step_12       object
 16     001_middle_step_13       object
 17     001_middle_step_14       object
 18     001_middle_step_15       object
 19     001_middle_step_16       object
 20     00

# Setting up Environment

In [4]:
unnecessary_columns = ['nuc_ix', 'name']
numeric_columns = [col for col in nuc_data.columns.tolist() if col not in unnecessary_columns]

In [32]:
from decimal import Decimal

for y, row in enumerate(nuc_data.itertuples(index=False)):
    for x, ele in enumerate(row):
        if not isinstance(ele, Decimal):
            print(f'{x},{y}: {ele}')

0,0: 10010
1,0: H1
0,1: 10020
1,1: H2
0,2: 10030
1,2: H3
0,3: 10040
1,3: H4
0,4: 10050
1,4: H5
0,5: 10060
1,5: H6
0,6: 10070
1,6: H7
0,7: 20030
1,7: He3
0,8: 20040
1,8: He4
0,9: 20050
1,9: He5
0,10: 20060
1,10: He6
0,11: 20070
1,11: He7
0,12: 20080
1,12: He8
0,13: 20090
1,13: He9
0,14: 20100
1,14: He10
0,15: 30040
1,15: Li4
0,16: 30050
1,16: Li5
0,17: 30060
1,17: Li6
0,18: 30070
1,18: Li7
0,19: 30080
1,19: Li8
0,20: 30090
1,20: Li9
0,21: 30100
1,21: Li10
0,22: 30110
1,22: Li11
0,23: 30120
1,23: Li12
0,24: 40050
1,24: Be5
0,25: 40060
1,25: Be6
0,26: 40070
1,26: Be7
0,27: 40080
1,27: Be8
0,28: 40090
1,28: Be9
0,29: 40100
1,29: Be10
0,30: 40110
1,30: Be11
0,31: 40120
1,31: Be12
0,32: 40130
1,32: Be13
0,33: 40140
1,33: Be14
0,34: 40150
1,34: Be15
0,35: 40160
1,35: Be16
0,36: 50060
1,36: B6
0,37: 50070
1,37: B7
0,38: 50080
1,38: B8
0,39: 50090
1,39: B9
0,40: 50100
1,40: B10
0,41: 50110
1,41: B11
0,42: 50120
1,42: B12
0,43: 50130
1,43: B13
0,44: 50140
1,44: B14
0,45: 50150
1,45: B15
0,46: 50

1,610: Cu54
0,611: 290550
1,611: Cu55
0,612: 290560
1,612: Cu56
0,613: 290570
1,613: Cu57
0,614: 290580
1,614: Cu58
0,615: 290590
1,615: Cu59
0,616: 290600
1,616: Cu60
0,617: 290610
1,617: Cu61
0,618: 290620
1,618: Cu62
0,619: 290630
1,619: Cu63
0,620: 290640
1,620: Cu64
0,621: 290650
1,621: Cu65
0,622: 290660
1,622: Cu66
0,623: 290670
1,623: Cu67
0,624: 290680
1,624: Cu68
0,625: 290681
1,625: Cu68m1
0,626: 290690
1,626: Cu69
0,627: 290700
1,627: Cu70
0,628: 290701
1,628: Cu70m1
0,629: 290702
1,629: Cu70m2
0,630: 290710
1,630: Cu71
0,631: 290720
1,631: Cu72
0,632: 290730
1,632: Cu73
0,633: 290740
1,633: Cu74
0,634: 290750
1,634: Cu75
0,635: 290760
1,635: Cu76
0,636: 290761
1,636: Cu76m1
0,637: 290770
1,637: Cu77
0,638: 290780
1,638: Cu78
0,639: 290790
1,639: Cu79
0,640: 290800
1,640: Cu80
0,641: 290810
1,641: Cu81
0,642: 300540
1,642: Zn54
0,643: 300550
1,643: Zn55
0,644: 300560
1,644: Zn56
0,645: 300570
1,645: Zn57
0,646: 300580
1,646: Zn58
0,647: 300590
1,647: Zn59
0,648: 300600
1,64

1,1297: Pd104
0,1298: 461050
1,1298: Pd105
0,1299: 461060
1,1299: Pd106
0,1300: 461070
1,1300: Pd107
0,1301: 461071
1,1301: Pd107m1
0,1302: 461080
1,1302: Pd108
0,1303: 461090
1,1303: Pd109
0,1304: 461091
1,1304: Pd109m1
0,1305: 461100
1,1305: Pd110
0,1306: 461110
1,1306: Pd111
0,1307: 461111
1,1307: Pd111m1
0,1308: 461120
1,1308: Pd112
0,1309: 461130
1,1309: Pd113
0,1310: 461131
1,1310: Pd113m1
0,1311: 461140
1,1311: Pd114
0,1312: 461150
1,1312: Pd115
0,1313: 461151
1,1313: Pd115m1
0,1314: 461160
1,1314: Pd116
0,1315: 461170
1,1315: Pd117
0,1316: 461171
1,1316: Pd117m1
0,1317: 461180
1,1317: Pd118
0,1318: 461190
1,1318: Pd119
0,1319: 461200
1,1319: Pd120
0,1320: 461210
1,1320: Pd121
0,1321: 461220
1,1321: Pd122
0,1322: 461230
1,1322: Pd123
0,1323: 461240
1,1323: Pd124
0,1324: 461250
1,1324: Pd125
0,1325: 461260
1,1325: Pd126
0,1326: 470930
1,1326: Ag93
0,1327: 470940
1,1327: Ag94
0,1328: 470941
1,1328: Ag94m1
0,1329: 470942
1,1329: Ag94m2
0,1330: 470950
1,1330: Ag95
0,1331: 470951
1,1

0,1858: 561490
1,1858: Ba149
0,1859: 561500
1,1859: Ba150
0,1860: 561510
1,1860: Ba151
0,1861: 561520
1,1861: Ba152
0,1862: 561530
1,1862: Ba153
0,1863: 571170
1,1863: La117
0,1864: 571171
1,1864: La117m1
0,1865: 571180
1,1865: La118
0,1866: 571190
1,1866: La119
0,1867: 571200
1,1867: La120
0,1868: 571210
1,1868: La121
0,1869: 571220
1,1869: La122
0,1870: 571230
1,1870: La123
0,1871: 571240
1,1871: La124
0,1872: 571241
1,1872: La124m1
0,1873: 571250
1,1873: La125
0,1874: 571251
1,1874: La125m1
0,1875: 571260
1,1875: La126
0,1876: 571261
1,1876: La126m1
0,1877: 571270
1,1877: La127
0,1878: 571271
1,1878: La127m1
0,1879: 571280
1,1879: La128
0,1880: 571281
1,1880: La128m1
0,1881: 571290
1,1881: La129
0,1882: 571291
1,1882: La129m1
0,1883: 571300
1,1883: La130
0,1884: 571310
1,1884: La131
0,1885: 571320
1,1885: La132
0,1886: 571321
1,1886: La132m1
0,1887: 571330
1,1887: La133
0,1888: 571340
1,1888: La134
0,1889: 571350
1,1889: La135
0,1890: 571360
1,1890: La136
0,1891: 571361
1,1891: La13

0,2343: 671520
1,2343: Ho152
0,2344: 671521
1,2344: Ho152m1
0,2345: 671530
1,2345: Ho153
0,2346: 671531
1,2346: Ho153m1
0,2347: 671540
1,2347: Ho154
0,2348: 671541
1,2348: Ho154m1
0,2349: 671550
1,2349: Ho155
0,2350: 671551
1,2350: Ho155m1
0,2351: 671560
1,2351: Ho156
0,2352: 671561
1,2352: Ho156m1
0,2353: 671562
1,2353: Ho156m2
0,2354: 671570
1,2354: Ho157
0,2355: 671580
1,2355: Ho158
0,2356: 671581
1,2356: Ho158m1
0,2357: 671582
1,2357: Ho158m2
0,2358: 671590
1,2358: Ho159
0,2359: 671591
1,2359: Ho159m1
0,2360: 671600
1,2360: Ho160
0,2361: 671601
1,2361: Ho160m1
0,2362: 671602
1,2362: Ho160m2
0,2363: 671610
1,2363: Ho161
0,2364: 671611
1,2364: Ho161m1
0,2365: 671620
1,2365: Ho162
0,2366: 671621
1,2366: Ho162m1
0,2367: 671630
1,2367: Ho163
0,2368: 671631
1,2368: Ho163m1
0,2369: 671640
1,2369: Ho164
0,2370: 671641
1,2370: Ho164m1
0,2371: 671650
1,2371: Ho165
0,2372: 671660
1,2372: Ho166
0,2373: 671661
1,2373: Ho166m1
0,2374: 671670
1,2374: Ho167
0,2375: 671680
1,2375: Ho168
0,2376: 671

0,2823: 771770
1,2823: Ir177
0,2824: 771780
1,2824: Ir178
0,2825: 771790
1,2825: Ir179
0,2826: 771800
1,2826: Ir180
0,2827: 771810
1,2827: Ir181
0,2828: 771820
1,2828: Ir182
0,2829: 771830
1,2829: Ir183
0,2830: 771840
1,2830: Ir184
0,2831: 771850
1,2831: Ir185
0,2832: 771860
1,2832: Ir186
0,2833: 771861
1,2833: Ir186m1
0,2834: 771870
1,2834: Ir187
0,2835: 771871
1,2835: Ir187m1
0,2836: 771880
1,2836: Ir188
0,2837: 771881
1,2837: Ir188m1
0,2838: 771890
1,2838: Ir189
0,2839: 771891
1,2839: Ir189m1
0,2840: 771892
1,2840: Ir189m2
0,2841: 771900
1,2841: Ir190
0,2842: 771901
1,2842: Ir190m1
0,2843: 771902
1,2843: Ir190m2
0,2844: 771910
1,2844: Ir191
0,2845: 771911
1,2845: Ir191m1
0,2846: 771912
1,2846: Ir191m2
0,2847: 771920
1,2847: Ir192
0,2848: 771921
1,2848: Ir192m1
0,2849: 771922
1,2849: Ir192m2
0,2850: 771930
1,2850: Ir193
0,2851: 771931
1,2851: Ir193m1
0,2852: 771940
1,2852: Ir194
0,2853: 771941
1,2853: Ir194m1
0,2854: 771942
1,2854: Ir194m2
0,2855: 771950
1,2855: Ir195
0,2856: 771951


0,3402: 892070
1,3402: Ac207
0,3403: 892080
1,3403: Ac208
0,3404: 892081
1,3404: Ac208m1
0,3405: 892090
1,3405: Ac209
0,3406: 892100
1,3406: Ac210
0,3407: 892110
1,3407: Ac211
0,3408: 892120
1,3408: Ac212
0,3409: 892130
1,3409: Ac213
0,3410: 892140
1,3410: Ac214
0,3411: 892150
1,3411: Ac215
0,3412: 892160
1,3412: Ac216
0,3413: 892161
1,3413: Ac216m1
0,3414: 892170
1,3414: Ac217
0,3415: 892180
1,3415: Ac218
0,3416: 892190
1,3416: Ac219
0,3417: 892200
1,3417: Ac220
0,3418: 892210
1,3418: Ac221
0,3419: 892220
1,3419: Ac222
0,3420: 892221
1,3420: Ac222m1
0,3421: 892230
1,3421: Ac223
0,3422: 892240
1,3422: Ac224
0,3423: 892250
1,3423: Ac225
0,3424: 892260
1,3424: Ac226
0,3425: 892270
1,3425: Ac227
0,3426: 892280
1,3426: Ac228
0,3427: 892290
1,3427: Ac229
0,3428: 892300
1,3428: Ac230
0,3429: 892310
1,3429: Ac231
0,3430: 892320
1,3430: Ac232
0,3431: 892330
1,3431: Ac233
0,3432: 892340
1,3432: Ac234
0,3433: 892350
1,3433: Ac235
0,3434: 892360
1,3434: Ac236
0,3435: 902090
1,3435: Th209
0,3436: 

In [5]:
from pycaret.anomaly import *

exp_ano = setup(nuc_data, 
                normalize = True, 
                normalize_method='robust',
                ignore_features = unnecessary_columns,
                numeric_features = numeric_columns,
                session_id = 123)

Unnamed: 0,Description,Value
0,session_id,123
1,Original Data,"(3821, 10535)"
2,Missing Values,False
3,Numeric Features,10533
4,Categorical Features,0
5,Ordinal Features,False
6,High Cardinality Features,False
7,High Cardinality Method,
8,Transformed Data,"(3821, 10533)"
9,CPU Jobs,-1


# Create a Model

In [22]:
iforest = create_model('iforest', n_jobs=-1, fraction=0.005, n_estimators=256)

In [33]:
iforest = create_model('iforest', n_jobs=-1, n_estimators=256)

In [34]:
iforest

IForest(behaviour='new', bootstrap=False, contamination=0.05,
    max_features=1.0, max_samples='auto', n_estimators=256, n_jobs=-1,
    random_state=123, verbose=0)

# Assign a Model

In [35]:
iforest_results = assign_model(iforest)

In [36]:
iforest_results.head()

Unnamed: 0,nuc_ix,name,001_first_step,001_last_step,001_middle_step_1,001_middle_step_2,001_middle_step_3,001_middle_step_4,001_middle_step_5,001_middle_step_6,...,R15_073_middle_step_112,R15_073_middle_step_113,R15_073_middle_step_114,R15_073_middle_step_115,R15_073_middle_step_116,R15_073_middle_step_117,R15_073_middle_step_118,R15_073_middle_step_119,Anomaly,Anomaly_Score
0,10010,H1,0,0,4.646090694e-06,9.3445449e-06,1.410187475e-05,1.891756544e-05,2.379071329e-05,2.872040805e-05,...,9.970577562e-05,9.970577562e-05,9.970577562e-05,9.970577562e-05,9.970577562e-05,9.970577562e-05,9.970577562e-05,9.970577562e-05,0,-0.0
1,10020,H2,0,0,2.187007232e-06,4.388726293e-06,6.607321145e-06,8.842630832e-06,1.10943665e-05,1.336223707e-05,...,4.550146851e-05,4.550146851e-05,4.550146851e-05,4.550146851e-05,4.550146851e-05,4.550146851e-05,4.550146851e-05,4.550146851e-05,0,-0.0
2,10030,H3,0,0,2.829193536e-05,5.667838403e-05,8.518316066e-05,0.0001138057392,0.0001425441966,0.0001713965614,...,0.0005725769899,0.0005724887987,0.0005724006212,0.0005723124572,0.0005722243068,0.00057213617,0.0005720480467,0.000571959937,0,-0.0
3,10040,H4,0,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,-0.0
4,10050,H5,0,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,-0.0


In [37]:
iforest_results[iforest_results.Anomaly == 1]

Unnamed: 0,nuc_ix,name,001_first_step,001_last_step,001_middle_step_1,001_middle_step_2,001_middle_step_3,001_middle_step_4,001_middle_step_5,001_middle_step_6,...,R15_073_middle_step_112,R15_073_middle_step_113,R15_073_middle_step_114,R15_073_middle_step_115,R15_073_middle_step_116,R15_073_middle_step_117,R15_073_middle_step_118,R15_073_middle_step_119,Anomaly,Anomaly_Score
954,380890,Sr89,0,0,0.01127190399,0.02110350471,0.02966289133,0.03711307994,0.0435960354,0.04923552073,...,0.05877960535,0.057978799,0.05718890274,0.05640976793,0.05564124797,0.05488319822,0.05413547606,0.05339794078,1,0.292271
955,380900,Sr90,0,0,0.01426213088,0.02850174688,0.04271733607,0.05690883381,0.07107596849,0.08521846343,...,0.2787089509,0.27869058,0.2786722104,0.2786538419,0.2786354746,0.2786171086,0.2785987437,0.2785803801,1,0.292271
997,390910,Y91,0,0,0.01344078096,0.02619049337,0.03750595932,0.04754734108,0.05645665854,0.06436005211,...,0.08984491797,0.08878683688,0.08774121655,0.08670791022,0.08568677289,0.08467766123,0.08368043363,0.08269495013,1,0.292271
1041,400920,Zr92,0,0,0.01498925883,0.03055848608,0.04612817303,0.06169813976,0.07726788199,0.09283688277,...,0.3083929513,0.3083929513,0.3083929513,0.3083929513,0.3083929513,0.3083929513,0.3083929513,0.3083929513,1,0.292271
1042,400930,Zr93,0,1,0.01567020577,0.0323789401,0.04909905654,0.06583030157,0.08257198923,0.09932341517,...,0.3321695562,0.3321695558,0.3321695554,0.332169555,0.3321695546,0.3321695541,0.3321695537,0.3321695533,1,0.343759
1043,400940,Zr94,0,1,0.01652437607,0.03309697935,0.04969207063,0.06630927205,0.08294776395,0.09960671603,...,0.33079079,0.33079079,0.33079079,0.33079079,0.33079079,0.33079079,0.33079079,0.33079079,1,0.343759
1044,400950,Zr95,0,0,0.01592146794,0.03024490317,0.04312570679,0.05471150592,0.06513433114,0.07451265407,...,0.1118427778,0.1106386077,0.1094474025,0.1082690226,0.1071033299,0.1059501877,0.104809461,0.1036810161,1,0.292271
1045,400960,Zr96,0,0,0.01623614081,0.03249765778,0.04879229278,0.0651195523,0.08147845889,0.09786802842,...,0.3258379639,0.3258379639,0.3258379639,0.3258379639,0.3258379639,0.3258379639,0.3258379639,0.3258379639,1,0.321188
1127,420970,Mo97,0,0,0.01402338193,0.02977023076,0.04555896001,0.06138912776,0.0772597058,0.09316963308,...,0.3166617184,0.3166617184,0.3166617184,0.3166617184,0.3166617184,0.3166617184,0.3166617184,0.3166617184,1,0.321188
1128,420980,Mo98,0,0,0.01512925818,0.03029767463,0.04551239921,0.06077290218,0.07607812152,0.09142698496,...,0.3057040036,0.3057040036,0.3057040036,0.3057040036,0.3057040036,0.3057040036,0.3057040036,0.3057040036,1,0.321188


# Plot a Model

In [27]:
plot_model(iforest)

In [28]:
plot_model(iforest, plot = 'umap')

# Saving the Model

In [29]:
save_model(iforest,'nuc_all_steps_isotope_iforest_flat_0_005_model')

Transformation Pipeline and Model Succesfully Saved


(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=True,
                                       features_todrop=['nuc_ix', 'name'],
                                       id_columns=[], ml_usecase='regression',
                                       numerical_features=['001_first_step',
                                                           '001_last_step',
                                                           '001_middle_step_1',
                                                           '001_middle_step_2',
                                                           '001_middle_step_3',
                                                           '001_middle_step_4',
                                                           '001_middle_step_5',
                                                           '001_middle_step_6',
                                       

# Predict on Unseen Data

In [None]:
predictions = predict_model(iforest, data=nuc_data)

In [None]:
predictions.head()

In [None]:
predictions[predictions.Anomaly == 1]

# Loading the Saved Model

In [None]:
from pycaret.anomaly import *
saved_iforest = load_model('nuc_all_steps_isotope_model')

In [None]:
new_prediction = predict_model(saved_iforest, data=nuc_data)

In [None]:
new_prediction.head()

In [None]:
new_prediction[new_prediction.Label == 1]