In [9]:
%load_ext autoreload
%autoreload 2

In [107]:
import numpy as np
import pickle as pkl
import matplotlib.pyplot as plt
from tqdm import tqdm

In [39]:
train_dis = pkl.load(open('train_dis.pkl', 'rb'), encoding='latin1')
test_dis = pkl.load(open('test_dis.pkl', 'rb'), encoding='latin1')
train_nondis = pkl.load(open('train_nondis.pkl', 'rb'), encoding='latin1')
test_nondis = pkl.load(open('test_nondis.pkl', 'rb'), encoding='latin1')
train_datasets = [train_dis, train_nondis]
test_datasets = [test_dis, test_nondis]

In [139]:
WINDOW_LEN = 100
WINDOW_SAMPLING_LEN = 10
PRED_INTERVAL = 250
ACT_SIGNAL = ['pinj']
TARGET_SIGNAL = ['efsbetan']
SIGNAL_LIST = ['R0', 'aminor', 'dssdenest', 'efsbetan', 'efsli', 
               'efsvolume', 'ip', 'kappa', 'tribot', 'tritop', 
               'pinj']

In [140]:
assert WINDOW_LENGTH % WINDOW_SAMPLING_LEN == 0

In [146]:
act_idx = [SIGNAL_LIST.index(x) for x in ACT_SIGNAL]
target_idx = [SIGNAL_LIST.index(x) for x in TARGET_SIGNAL]
print(act_idx, target_idx)

[10] [3]


In [42]:
# SIGNAL_LIST = ['R0', 'aminor', 'dssdenest', 'efsbetan', 'efsli', 'efsvolume', 'ip', 'kappa', 'tribot', 'tritop',
#  'pinj', 'pinj_15l', 'pinj_15r', 'pinj_21l', 'pinj_21r', 'pinj_30l', 'pinj_30r', 'pinj_33l', 'pinj_33r']

### state is last 100ms window, action is next 250ms mean power, next state is efsbetan 250ms later

In [147]:
def find_tidx(target_t, t_array):
    t_array = t_array.flatten()
    if target_t > t_array[-1]:
        raise RuntimeError('Target time exceeded array')
    
    if target_t == t_array[0]:
        return 0
    
    target_idx = None
    for t_idx in range(t_array.size - 1):
        if t_array[t_idx] < target_t and target_t <= t_array[t_idx+1]:
            target_idx = t_idx+1
            break
    if target_idx is None:
        print('Error in finding time index: target {} in time arr from {} to {}'.format(
        target_t, t_array[0], t_array[-1]))
    return target_idx

In [148]:
def make_prediction_data(dataset_list):
    sar_list = []
    X_list = []
    y_list = []
    for dataset in dataset_list:
        for shot,v in tqdm(dataset.items()):
            times = v[0].flatten()
            if times.size == 0:
                print(shot)
                continue
            values = v[1]
            window_beg_time = times[0]
            
            

            while True:
                window_end_time = window_beg_time + WINDOW_LENGTH
                pred_time = window_end_time + PRED_INTERVAL
                if pred_time > times[-1]:
                    break

                window_beg_idx = find_tidx(window_beg_time, times)
                window_end_idx = find_tidx(window_end_time, times)
                pred_time_idx = find_tidx(pred_time, times)

                try:
                    window = values[:, window_beg_idx:window_end_idx]
                    action = np.mean(values[act_idx, window_end_idx : pred_time_idx], axis = 1)
                    target = values[target_idx, pred_time_idx]
                except:
                    print(shot, values.shape, window_beg_idx, window_end_idx, pred_time_idx)
                    raise RuntimeError
                window_sample = window[:,np.linspace(0, WINDOW_LEN-1, WINDOW_SAMPLING_LEN).astype(int)]

                curr_sasr = (window_sample, action, target)
                curr_X = np.concatenate([window_sample.flatten(), action.flatten()]).reshape(1,-1)
                assert curr_X.shape[0] == 1
                curr_y = target.flatten().reshape(1,-1)

                sar_list.append(curr_sasr)
                X_list.append(curr_X)
                y_list.append(curr_y)

                window_beg_time += 10
    X_npy = np.concatenate(X_list, axis=0)
    y_npy = np.concatenate(y_list, axis=0)
    
    return X_npy, y_npy, sar_list

In [132]:
train_X, train_y, train_sar = make_prediction_data([train_dis, train_nondis])

100%|██████████| 240/240 [00:43<00:00,  5.48it/s]
  3%|▎         | 8/240 [00:01<00:36,  6.29it/s]

161420


 97%|█████████▋| 233/240 [00:43<00:01,  6.18it/s]

162300


100%|██████████| 240/240 [00:45<00:00,  5.32it/s]


In [149]:
test_X, test_y, test_sar = make_prediction_data([test_dis, test_nondis])

100%|██████████| 60/60 [00:10<00:00,  5.49it/s]
100%|██████████| 60/60 [00:12<00:00,  4.88it/s]


In [137]:
data_dir = 'data_1/'
np.save(data_dir+'train_X.npy', train_X)
np.save(data_dir+'train_y.npy', train_y)
np.save(data_dir+'test_X.npy', test_X)
np.save(data_dir+'test_y.npy', test_y)
pkl.dump(train_sar, open(data_dir+'train_sar.pkl', 'wb'))
pkl.dump(test_sar, open(data_dir+'test_sar.pkl', 'wb'))

In [151]:
test_X.shape

(19082, 111)