# Creating a database of scalar values for ICRH coupling analysis

In [1]:
# assume working in Jupyter Lab
%matplotlib inline 

%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('C:\\Users\\JH218595\\Documents\\pywed')

In [3]:
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt 
plt.rcParams['figure.figsize'] = (10,6)

from control_room import *
from pulse_database import PulseDB
from tqdm import tqdm

import pandas as pd
import seaborn as sns
sns.set_style('whitegrid')

pradwest only available on linux machines


The database has been created in another notebook. Importing database : 

In [4]:
hdf5_filename = 'databases/WEST_C4_ICRH_pulse_data.hdf5'
db = PulseDB(hdf5_filename)
print(f'Database contains {len(db.pulse_list)} shots, from #{db.pulse_list[0]} to #{db.pulse_list[-1]} ')

Database contains 225 shots, from #54404 to #55227 


## Creating a meaningfull database with pandas
The idea is to split time in small pieces and to calculate scalar values for each of them

In [5]:
def split_in_pieces(y, t, nb_pieces):
    """Split a time signel y(t) into smaller piece of length dt, and return t, average, min, max and std of each of them"""
    y_mean_min_max, t_pieces = [], []
    if nb_pieces > 0: 
        ts = np.array_split(np.squeeze(t), nb_pieces)
        ys = np.array_split(np.squeeze(y), nb_pieces)
        for (_y, _t) in zip(ys, ts):
            t_pieces.append(np.mean(_t))
            y_mean_min_max.append(mean_min_max(_y))
        return np.array(y_mean_min_max), np.array(t_pieces)    
    else:
        return np.array([np.nan, np.nan, np.nan]), np.array([np.nan])
    

In [6]:
data = pd.DataFrame()
dt = 0.05 # s

for pulse in tqdm(db.pulse_list):
    # start as ip > 100 kA
    ip, t_ip = db.get_signal(pulse, 'Ip')
    
    t_start = t_ip[(ip > 100).squeeze()][0]
    t_end = t_ip[(ip > 100).squeeze()][-1]

    nb_pieces = int(np.round((t_end - t_start)/dt))
    
    rows = {'pulse': pulse}
    for signame in db.list_signal(pulse):           
        try:
            y, t = db.get_signal(pulse, signame)
            _y, _t = in_between(y, t, t_start, t_end)
            ys, ts = split_in_pieces(_y, _t, nb_pieces)

        except IndexError as e:
            y, t = db.get_signal(pulse, signame)
            # deals with resumed data (like IC frequencies):
            # replicate the data for the number of pieces
            if y.ndim == 1:
                ys = np.tile(y, (int(nb_pieces),1))
                
        except ValueError as e:
            ys = np.zeros(nb_pieces)
        
        # add data points into the Dataframe
        if signame == 'IC_Frequencies':
            rows['freq_Q1'] = ys[:,0]
            rows['freq_Q2'] = ys[:,1]
            rows['freq_Q4'] = ys[:,2]
        if signame == 'IC_Positions':
            rows['R_Q1'] = ys[:,0]
            rows['R_Q2'] = ys[:,1]
            rows['R_Q4'] = ys[:,2]            
        else:
            
            if ys.ndim > 1:  # if to deal with the case of zeros (ValueError previously)
                ys = ys[:,0]  # keep only mean (yet)
            rows[signame] = np.squeeze(ys)
            
        _df = pd.DataFrame(rows)
    # append data from each pulse to the final DF
    data = data.append(_df)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)
100%|████████████████████████████████████████| 225/225 [02:13<00:00,  2.02it/s]


In [7]:
data.query('pulse == 55215')['Prad']

0      0.198305
1      0.191809
2      0.215402
3      0.213503
4      0.185092
5      0.209027
6      0.246419
7      0.270550
8      0.274964
9      0.353263
10     0.347268
11     0.341945
12     0.330268
13     0.325691
14     0.325049
15     0.315841
16     0.313601
17     0.313779
18     0.318004
19     0.319862
20     0.318982
21     0.316751
22     0.309419
23     0.307076
24     0.307739
25     0.308788
26     0.308984
27     0.305351
28     0.300045
29     0.292635
         ...   
176    0.463673
177    0.463348
178    0.461452
179    0.464318
180    0.464583
181    0.466293
182    0.465569
183    0.467330
184    0.469582
185    0.465859
186    0.470538
187    0.477654
188    0.603772
189    0.643318
190    0.633606
191    0.763611
192    0.928220
193    0.952980
194    0.951186
195    0.947821
196    0.944316
197    0.857655
198    0.709856
199    0.853262
200    0.867225
201    1.013832
202    0.776838
203    0.823715
204    1.011851
205    0.962115
Name: Prad, Length: 206,

In [8]:
pulse = 55215

In [9]:
    ip, t_ip = db.get_signal(pulse, 'Ip')
    
    t_start = t_ip[(ip > 100).squeeze()][0]
    t_end = t_ip[(ip > 100).squeeze()][-1]

In [10]:
nb_pieces = int(np.round((t_end - t_start)/dt))

In [11]:
_y, _t = db.get_signal(pulse, 'Prad')

In [12]:
 ys, ts = split_in_pieces(_y, _t, nb_pieces)

In [13]:
ys[:,0]

array([4.83331213e-03, 4.80829784e-03, 4.80839737e-03, 4.80671772e-03,
       4.89999280e-03, 4.86018198e-03, 4.91723005e-03, 4.94691209e-03,
       5.03203120e-03, 5.16148520e-03, 5.16350490e-03, 5.12332049e-03,
       5.21074123e-03, 5.18968100e-03, 5.20835427e-03, 5.23343015e-03,
       5.26485388e-03, 4.96795041e-03, 6.71250235e-03, 6.89079171e-03,
       3.91230623e-03, 4.61171281e-03, 5.85905545e-03, 6.33918042e-03,
       7.02167087e-03, 7.50835642e-03, 6.43931735e-03, 6.92749132e-03,
       7.30129402e-03, 7.81501664e-03, 7.30011668e-03, 9.00953534e-03,
       6.57057921e-03, 6.86132460e-03, 7.30693329e-03, 8.51097508e-03,
       8.87993668e-03, 9.08290187e-03, 8.86453511e-03, 8.56498117e-03,
       8.93583709e-03, 9.73946255e-03, 9.32835012e-03, 9.51052153e-03,
       9.71111715e-03, 9.61911431e-03, 1.09953815e-02, 1.03736983e-02,
       1.05297966e-02, 1.01201415e-02, 1.06016808e-02, 1.08454984e-02,
       1.14650428e-02, 1.12685562e-02, 1.18090576e-02, 1.14892660e-02,
      

In [14]:
data.query('pulse == 55215')

Unnamed: 0,IC_Frequencies,IC_P_Q1,IC_P_Q2,IC_P_Q4,IC_P_tot,IC_Rc_Q1_avg,IC_Rc_Q2_avg,IC_Rc_Q4_avg,IC_Vacuum_Q1_left,IC_Vacuum_Q1_right,...,Prad_bulk,R_Q1,R_Q2,R_Q4,Rext_median,freq_Q1,freq_Q2,freq_Q4,nl,pulse
0,55.5,0.0,0.0,0.100000,0.000000,0.079,0.214,1.498000,,,...,0.197864,2.9501,2.9501,2.9499,2924.040000,55.5,55.900002,56.049999,0.878695,55215
1,55.5,0.0,0.0,0.100000,0.000000,0.079,0.214,1.498000,,,...,0.191614,2.9501,2.9501,2.9499,2932.960000,55.5,55.900002,56.049999,1.210232,55215
2,55.5,0.0,0.0,0.100000,0.000000,0.079,0.214,1.498000,,,...,0.214594,2.9501,2.9501,2.9499,2935.640000,55.5,55.900002,56.049999,1.417556,55215
3,55.5,0.0,0.0,0.100000,0.000000,0.079,0.214,1.498000,,,...,0.209370,2.9501,2.9501,2.9499,2900.480000,55.5,55.900002,56.049999,1.511277,55215
4,55.5,0.0,0.0,0.100000,0.000000,0.079,0.214,1.498000,,,...,0.183207,2.9501,2.9501,2.9499,2925.840000,55.5,55.900002,56.049999,1.605372,55215
5,55.5,0.0,0.0,0.100000,0.000000,0.079,0.214,1.498000,,,...,0.208398,2.9501,2.9501,2.9499,2937.680000,55.5,55.900002,56.049999,1.706567,55215
6,55.5,0.0,0.0,0.100000,0.000000,0.079,0.214,1.498000,,,...,0.246236,2.9501,2.9501,2.9499,2936.200000,55.5,55.900002,56.049999,1.863144,55215
7,55.5,0.0,0.0,0.100000,0.000000,0.079,0.214,1.498000,,,...,0.269773,2.9501,2.9501,2.9499,2936.920000,55.5,55.900002,56.049999,1.942142,55215
8,55.5,0.0,0.0,0.100000,0.000000,0.079,0.214,1.498000,,,...,0.274839,2.9501,2.9501,2.9499,2937.120000,55.5,55.900002,56.049999,1.939899,55215
9,55.5,0.0,0.0,0.100000,0.000000,0.079,0.214,1.498000,,,...,0.341543,2.9501,2.9501,2.9499,2937.040000,55.5,55.900002,56.049999,1.913293,55215


In [15]:
data.to_csv('WEST_C4_database_resumed_parameters.csv')

In [16]:
data.size

2378695