In [2]:
# Data stuffs
import pickle 
import numpy as np
import pandas as pd
import pathlib 
import os, sys
from moxie.data.utils_ import load_data, standardize, de_standardize, normalize_profiles


# Make it look pretty
from tqdm.notebook import tqdm  

# ML Stuff
from scipy.stats import truncnorm


In [3]:
train_data, val_data, test_data = load_data(dataset_choice='SANDBOX_NO_VARIATIONS', file_loc='../../../moxie/data/processed/pedestal_profiles_ML_READY_ak_5052022_uncerts_mask.pickle')
(train_X, train_y, train_mask, train_radii, train_real_space_radii, train_ids, train_uncert), (val_X, val_y, val_mask, val_radii, val_real_space_radii, val_ids, val_uncert), (test_X, test_y, test_mask, test_radii, test_real_space_radii, test_ids, test_uncert) = train_data, val_data, test_data

with open('../../data/raw/new_elm_timings.pickle', 'rb') as file: 
    elm_timings = pickle.load(file)

machine_param_order = ['Q95', 'RGEO', 'CR0', 'VOLM', 'TRIU', 'TRIL', 'ELON', 'POHM', 'IPLA', 'BVAC', 'NBI', 'ICRH', 'ELER']

train_pulse_order = [int(x.split('/')[0]) for x in train_ids]
train_pulses = list(set(train_pulse_order))
train_pulse_idxs = [[index for index in range(len(train_pulse_order)) if train_pulse_order[index] == pulse] for pulse in train_pulses]

val_pulse_order = [int(x.split('/')[0]) for x in val_ids]
val_pulses = list(set(val_pulse_order))
val_pulse_idxs = [[index for index in range(len(val_pulse_order)) if val_pulse_order[index] == pulse] for pulse in val_pulses]

test_pulse_order = [int(x.split('/')[0]) for x in test_ids]
test_pulses = list(set(test_pulse_order))
test_pulse_idxs = [[index for index in range(len(test_pulse_order)) if test_pulse_order[index] == pulse] for pulse in test_pulses]



Collect neseps for each slice in train-val-test, and store and save as smaller subset. 

```
    dict_supervsied = {'train': {'mps': np.array((13, N)), 'neseps': np.array(N))}, 
                        'val': {'mps': np.array((13, N)), 'neseps': np.array(N))}, 
                        'test': {'mps': np.array((13, N)), 'neseps': np.array(N))}}
```

In [6]:
def calculate_neseps_in_set(set_ids, set_profiles, set_uncerts, set_masks, set_mps, set_shot_numbers_by_slice, set_pulse_time_ids): 
    iterator = tqdm(range(len(set_ids)))
    
    hrts_timings = np.array([float(x.split('/')[1]) for x in set_pulse_time_ids])
    nesep_means_by_pulse = []
    neseps_means = []
    mps_means = []
    set_elm_timings = []
    for k in iterator: 
        slice_loc_in_set = set_ids[k]
        
        pulse_number = np.array(set_shot_numbers_by_slice)[set_ids[k]][0]
        
        if pulse_number != 91962: 
            continue
        
        sample_timings = hrts_timings[slice_loc_in_set]
        
        sample_profiles, sample_uncerts, sample_masks, sample_mps = set_profiles[slice_loc_in_set], set_uncerts[slice_loc_in_set], set_masks[slice_loc_in_set], set_mps[slice_loc_in_set]
        try: 
            pulse_global_elm_timings = np.array(elm_timings[pulse_number])
            sample_elm_percents = calculate_elm_timing(pulse_global_elm_timings, sample_timings)
        except KeyError as e: 
            print('No ELM timings here', )
            sample_elm_percents = [np.nan]*len(sample_timings)
        # print(len(sample_timings), sample_timings, len(sample_profiles))
        
        
        pulse_neseps = calculate_nesep_for_pulse(sample_profiles, sample_uncerts, sample_masks, lbound_n = 0, ubound_n = 0.5e21, lbound_t=0, ubound_t=2000)
        nesep_means_by_pulse.extend(pulse_neseps)
        
        neseps_means.append(pulse_neseps.mean())
        mps_means.append(sample_mps.mean(0))
        set_elm_timings.extend(sample_elm_percents)
        iterator.set_description_str(str(pulse_number))   
    return np.array(nesep_means_by_pulse), np.array(neseps_means), np.array(mps_means), np.array(set_elm_timings)


def calculate_elm_timing(pulse_elm_times, hrts_timings): 
    print(pulse_elm_times, hrts_timings)
    # (time_hrts_scan - time_last_elm) / (time_next_elm - time_last_elm)
    elm_percents = np.full_like(hrts_timings, np.nan)
    # print(len(elm_percents))
    for p, time in enumerate(hrts_timings): 
        # print(time)
        diff =pulse_elm_times - time
        # print(time, pulse_elm_times)
        try: 
            time_last_elm = pulse_elm_times[diff < 0][-1]
            assert time > time_last_elm
        except IndexError as e: 
            print(e, 'Time falls before recorded elms', time, pulse_elm_times[0])
            continue
        
        try: 
            time_next_elm = pulse_elm_times[diff > 0][0]
            assert time < time_next_elm 
        except IndexError as e: 
            print(e, 'Time falls after recorded elms', time, pulse_elm_times[-1])
            continue
        
        
        percent = (time - time_last_elm) / (time_next_elm - time_last_elm) 
        
        
        elm_percents[p] = percent
        # print(diff, pulse_elm_times[diff > 0][0], pulse_elm_times[diff < 0][-1])
              
    print(elm_percents)
    return elm_percents

def calculate_nesep_for_pulse(both_profiles, both_uncertanties, integer_masks, lbound_n, ubound_n, lbound_t, ubound_t, conditional_prediction=False):
    """
    This will return the neseps predicted for each time slice in the pulse. 
    """
    
    pulse_neseps = np.zeros(len(both_profiles))
    pulse_teseps = np.zeros(len(both_profiles))
    if conditional_prediction: 
        integer_masks = np.ones_like(integer_masks, dtype=bool)
        both_uncertanties = np.ones_like(both_uncertanties, dtype=bool)*200
    
    for n, (both_profiles_slice, both_uncertanties_slice, mask_int_slice) in enumerate(zip(both_profiles, both_uncertanties, integer_masks)):
        bool_mask = mask_int_slice > 0
        slice_ne, slice_te = both_profiles_slice[0, :][bool_mask], both_profiles_slice[1, :][bool_mask]
        slice_ne_uncert, slice_te_uncert = both_uncertanties_slice[0, :][bool_mask], both_uncertanties_slice[1, :][bool_mask]
        
        tes_gaussians = np.array([np.linspace(truncnorm.ppf(0.0001, (lbound_t - mu) / var, (ubound_t - mu) / var, mu, var), 
                                     truncnorm.ppf(0.9999, (lbound_t - mu) / var, (ubound_t - mu) / var, mu, var), 10000)
                        for mu, var in zip(slice_te, slice_te_uncert)])
        
        separatrix_loc = np.logical_and(tes_gaussians > 90, tes_gaussians < 110)
        tesep_in_separatrix = tes_gaussians[separatrix_loc]
        
        
        
        
        nes_gaussians = np.array([np.linspace(truncnorm.ppf(0.1, (lbound_n - mu) / var, (ubound_n - mu) / var, mu, var), 
                                     truncnorm.ppf(0.9, (lbound_n - mu) / var, (ubound_n - mu) / var, mu, var), 10000)
                        for mu, var in zip(slice_ne, slice_ne_uncert)])

        nes_in_separatrix = nes_gaussians[separatrix_loc]
        if len(nes_in_separatrix) == 0: 
            print(slice_ne, slice_te, slice_ne_uncert, slice_te_uncert)
        slice_nesep = np.mean(nes_in_separatrix)
        slice_tesep = np.mean(tesep_in_separatrix)
        pulse_neseps[n] = slice_nesep
        pulse_teseps[n] = slice_tesep
    
    return pulse_neseps
                    

In [7]:
calculate_neseps_in_set(train_pulse_idxs, train_X, train_uncert, train_mask, train_y, train_pulse_order, train_ids)
# train_neseps, train_neseps_mean, train_mps_mean, train_elm_percents = calculate_neseps_in_set(train_pulse_idxs, train_X, train_uncert, train_mask, train_y, train_pulse_order, train_ids)
# val_neseps, val_neseps_mean, val_mps_mean, val_elm_percents = calculate_neseps_in_set(val_pulse_idxs, val_X, val_uncert, val_mask, val_y, val_pulse_order, val_ids)
# test_neseps, test_neseps_mean, test_mps_mean, test_elm_percents = calculate_neseps_in_set(test_pulse_idxs, test_X, test_uncert, test_mask, test_y, test_pulse_order, test_ids)

  0%|          | 0/533 [00:00<?, ?it/s]

[52.46910095 52.5060997  52.5685997  52.60929871 52.65940094 52.70569992
 52.7901001  52.81869888 52.8628006  52.90620041 52.94580078 52.99380112
 53.05500031 53.10179901 53.19910049 53.26250076 53.32740021 53.35179901
 53.39839935 53.42829895] [52.48 52.53 52.58 52.63 52.68 52.73 52.78 52.83 52.88 52.93 52.98 53.03
 53.08 53.13 53.18 53.23 53.28 53.33 53.38 53.43]
index 0 is out of bounds for axis 0 with size 0 Time falls after recorded elms 53.43 53.42829895019531
[0.29457882 0.38240479 0.28011248 0.41318106 0.4449139  0.28791503
 0.88033085 0.25625119 0.39630131 0.60099412 0.71247874 0.59149286
 0.53419628 0.28983103 0.80369781 0.48737184 0.26963616 0.1065541
 0.60516699        nan]


(array([2.89339342e+19, 3.19565887e+19, 3.75938945e+19, 2.65557737e+19,
        2.59180198e+19, 2.35529191e+19, 3.47892837e+19, 3.76948380e+19,
        1.91092159e+19, 2.90442845e+19, 2.52842904e+19, 2.32760154e+19,
        4.16874274e+19, 3.55587545e+19, 3.47982168e+19, 2.42152839e+19,
        3.13155560e+19, 2.62405613e+19, 3.12336679e+19, 2.91429193e+19]),
 array([2.98950722e+19]),
 array([[ 2.8009982e+00,  2.8965344e+00,  9.3927974e-01,  7.5136436e+01,
          1.6933377e-01,  4.7922367e-01,  1.6192951e+00,  1.1461519e+06,
         -3.2159130e+06, -3.0414681e+00,  2.3516410e+07,  2.2271638e+06,
          1.5861258e+22]], dtype=float32),
 array([0.29457882, 0.38240479, 0.28011248, 0.41318106, 0.4449139 ,
        0.28791503, 0.88033085, 0.25625119, 0.39630131, 0.60099412,
        0.71247874, 0.59149286, 0.53419628, 0.28983103, 0.80369781,
        0.48737184, 0.26963616, 0.1065541 , 0.60516699,        nan]))

In [64]:

train_mps = np.concatenate((train_y, train_elm_percents.reshape(-1, 1)), axis=1)
val_mps = np.concatenate((val_y, val_elm_percents.reshape(-1, 1)), axis=1)
test_mps = np.concatenate((test_y, test_elm_percents.reshape(-1, 1)), axis=1)


In [71]:
with open('../../../moxie/data/processed/pedestal_profiles_ML_READY_ak_5052022_uncerts_mask.pickle', 'rb') as file:
    massive_dict = pickle.load(file)

massive_dict['SANDBOX_NO_VARIATIONS']['train_dict']['padded']['elm_timings_frass'] = train_elm_percents
massive_dict['SANDBOX_NO_VARIATIONS']['val_dict']['padded']['elm_timings_frass'] = val_elm_percents
massive_dict['SANDBOX_NO_VARIATIONS']['test_dict']['padded']['elm_timings_frass'] = test_elm_percents
with open('../../../moxie/data/processed/pedestal_profiles_ML_READY_ak_1052022_elm_timings.pickle', 'wb') as file:
    pickle.dump(massive_dict, file)
# val_X, val_y, val_mask, val_radii, val_real_space_radii, val_ids, val_uncert = full_dict['val_dict']['padded']['profiles'],full_dict['val_dict']['padded']['controls'], full_dict['val_dict']['padded']['masks'], full_dict['val_dict']['padded']['radii'], full_dict['val_dict']['padded']['real_space_radii'], full_dict['val_dict']['padded']['pulse_time_ids'], full_dict['val_dict']['padded']['uncerts']
# test_X, test_y, test_mask, test_radii, test_real_space_radii, test_ids, test_uncert = full_dict['test_dict']['padded']['profiles'],full_dict['test_dict']['padded']['controls'], full_dict['test_dict']['padded']['masks'], full_dict['test_dict']['padded']['radii'], full_dict['test_dict']['padded']['real_space_radii'], full_dict['test_dict']['padded']['pulse_time_ids'], full_dict['test_dict']['padded']['uncerts']
# return (train_X, train_y, train_mask, train_radii, train_real_space_radii, train_ids, train_uncert), (val_X, val_y, val_mask, val_radii, val_real_space_radii, val_ids, val_uncert), (test_X, test_y, test_mask, test_radii, test_real_space_radii, test_ids, test_uncert)

In [65]:
supervised_dict = {'train': {'mps': train_mps, 'neseps': train_neseps, 'mps_pulse': train_mps_mean, 'neseps_pulse': train_neseps_mean}, 
                    'val': {'mps': val_mps, 'neseps': val_neseps, 'mps_pulse': val_mps_mean, 'neseps_pulse': val_neseps_mean}, 
                    'test': {'mps': test_mps, 'neseps': test_neseps, 'mps_pulse': test_mps_mean, 'neseps_pulse': test_neseps_mean}, 
                  'MP_ORDER': machine_param_order}

In [66]:
PERSONAL_DATA_DIR_PROC = '/home/kitadam/ENR_Sven/moxie/data/processed/'
with open(PERSONAL_DATA_DIR_PROC + 'supervised_set_w_elm_timings.pickle', 'wb') as file:
    pickle.dump(supervised_dict, file)
with open(PERSONAL_DATA_DIR_PROC + 'supervised_set_w_elm_timings.pickle', 'rb') as file:
    supervised_dict = pickle.load(file)

In [None]:
supervised_dict

In [None]:
def calculate_nesep_for_pulse(both_profiles, both_uncertanties, integer_masks, pulse_rmids, lbound_n, ubound_n, lbound_t, ubound_t, conditional_prediction=False, local_pulse_number=None):
    """
    This will return the neseps predicted for each time slice in the pulse. 
    """
    
    lbound_r, ubound_r rad_var, SOL_width = 3.7, 3.9, 0.05, 0.02
    pulse_neseps = np.zeros(len(both_profiles))
    pulse_teseps = np.zeros(len(both_profiles))
    if conditional_prediction: 
        integer_masks = np.ones_like(integer_masks, dtype=bool)
        both_uncertanties = np.ones_like(both_uncertanties, dtype=bool)*200
    
    for n, (both_profiles_slice, both_uncertanties_slice, mask_int_slice, slice_rmid) in enumerate(zip(both_profiles, both_uncertanties, integer_masks, pulse_rmids)):
        bool_mask = mask_int_slice > 0
        slice_ne, slice_te = both_profiles_slice[0, :][bool_mask], both_profiles_slice[1, :][bool_mask]
        slice_ne_uncert, slice_te_uncert = both_uncertanties_slice[0, :][bool_mask], both_uncertanties_slice[1, :][bool_mask]
        slice_rmid_mask = slice_rmid[bool_mask]
        
        # Get the SOL radius first
        radii_gaussians = np.array([np.linspace(truncnorm.ppf(0.01, (lbound_r - mu) / var, (ubound_r - mu) / var, mu, var), 
                                     truncnorm.ppf(0.99, (lbound_r - mu) / var, (ubound_r - mu) / var, mu, var), 50000)
                        for mu, var in zip(slice_rmid_mask, [rad_var]*len(slice_rmid_mask))])
        
        tes_gaussians = np.array([np.linspace(truncnorm.ppf(0.01, (lbound_t - mu) / var, (ubound_t - mu) / var, mu, var), 
                                     truncnorm.ppf(0.99, (lbound_t - mu) / var, (ubound_t - mu) / var, mu, var), 50000)
                        for mu, var in zip(slice_te, slice_te_uncert)])
        
        separatrix_loc = np.logical_and(tes_gaussians > 80, tes_gaussians < 120)
        tesep_in_separatrix = tes_gaussians[separatrix_loc]
        rsep = radii_gaussians[separatrix_loc].mean()
        
        # Update the boolean mask to include SOL 
        
        SOL_mask = np.logical_and(slice_rmid_mask > rsep - SOL_width, slice_rmid_mask < rsep + SOL_width)
        
        slice_ne_SOL_masked, slice_ne_uncert_SOL_masked = slice_ne[SOL_mask], slice_ne_uncert[SOL_mask]
        nes_gaussians = np.array([np.linspace(truncnorm.ppf(0.01, (lbound_n - mu) / var, (ubound_n - mu) / var, mu, var), 
                                     truncnorm.ppf(0.99, (lbound_n - mu) / var, (ubound_n - mu) / var, mu, var), 50000)
                        for mu, var in zip(slice_ne_SOL_masked, slice_ne_uncert_SOL_masked)])

        nes_in_separatrix = nes_gaussians[separatrix_loc]
        if len(nes_in_separatrix) == 0: 
            print(slice_ne, slice_te, slice_ne_uncert, slice_te_uncert)
        slice_nesep = np.mean(nes_in_separatrix)
        slice_tesep = np.mean(tesep_in_separatrix)
        
        pulse_neseps[n] = slice_nesep
        pulse_teseps[n] = slice_tesep
    
    return pulse_neseps, pulse_teseps
