# Creating a Database of Scalar Values for WEST C5 Experimental Campaign 

In [32]:
# assume working in Jupyter Lab
%matplotlib inline 

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [33]:
import sys
# WEST libraries
sys.path.append('C:\\Users\\JH218595\\Documents\\pywed')
sys.path.append('C:\\Users\\JH218595\\Documents\\IRFMtb')
sys.path.append('C:\\Users\\JH218595\\Documents\\PPPAT')

In [34]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
plt.rcParams['figure.figsize'] = (10,6)
from tqdm.notebook import tqdm
#from scipy.optimize import curve_fit
#import seaborn as sns
#sns.set_style('whitegrid')
from pulse_database import PulseDB

In [35]:
from pppat.control_room.signals import *

The database has been created in another notebook. Importing database : 

In [36]:
dt = 0.10 # s

hdf5_filename = 'databases/WEST_C5_pulse_data.hdf5'
file_output = 'WEST_C5_database_resumed_parameters.csv'

In [37]:
db = PulseDB(hdf5_filename)
print(f'Database contains {len(db.pulse_list)} shots, from #{db.pulse_list[0]} to #{db.pulse_list[-1]} ')

Database contains 108 shots, from #56287 to #56489 


## Creating a meaningfull database with pandas
The idea is to split time in small pieces and to calculate scalar values for each of them

In [38]:
def split_in_pieces(y, t, nb_pieces):
    """Split a time signel y(t) into smaller piece of length dt, and return t, average, min, max and std of each of them"""
    y_mean_min_max, t_pieces = [], []
    if nb_pieces > 0: 
        ts = np.array_split(np.squeeze(t), nb_pieces)
        ys = np.array_split(np.squeeze(y), nb_pieces)
        for (_y, _t) in zip(ys, ts):
            # Get the mean, min and max values of the data in the time piece
            # Keep only if the mean value is within +/- 5% of the min/max
            # otherwise use NaN
            _mean, _mini, _maxi = mean_min_max(_y)
            
            if _mean == _mini == _maxi:  # no data
                y_mean_min_max.append([_mean, _mini, _maxi])           
                t_pieces.append(np.mean(_t))                
            elif (np.abs(_mean - _mini)/_mean < 30/100) and (np.abs(_mean - _maxi)/_mean < 30/100):
                y_mean_min_max.append([_mean, _mini, _maxi])           
                t_pieces.append(np.mean(_t))
            else:
                y_mean_min_max.append([np.nan, np.nan, np.nan]) 
                t_pieces.append(np.nan)
        return np.array(y_mean_min_max), np.array(t_pieces)    
    else:
        return np.array([np.nan, np.nan, np.nan]), np.array([np.nan])
    

In [39]:
data = pd.DataFrame()


for pulse in tqdm(db.pulse_list):
    # start as ip > 100 kA
    ip, t_ip = db.get_signal(pulse, 'Ip')
    
    t_start = t_ip[(ip > 0.3).squeeze()][0]
    t_end = t_ip[(ip > 0.3).squeeze()][-1]

    nb_pieces = int(np.round((t_end - t_start)/dt))
    
    rows = {'pulse': pulse}
    for signame in db.list_signal(pulse):           
        try:
            y, t = db.get_signal(pulse, signame)
            # Smooth some noisy signals
            if any(sig in signame for sig in ['Cu', 'Ag18', 'Ag19', 'Langmuir', 'Prad', 'Prad_bulk','LH_P_tot', 'IC_P_tot', 'IC_P_Q1', 'IC_P_Q2', 'IC_P_Q4']):
                y = smooth(y, window_length=21)

            # splitting signals in pieces
            _y, _t = in_between(y, t, t_start, t_end)
            ys, ts = split_in_pieces(_y, _t, nb_pieces)

            # Taking reference time from the plasma current
            if signame == 'Ip':
                rows['time'] = np.squeeze(ts)
                
        except IndexError as e:
            y, t = db.get_signal(pulse, signame)
            # deals with resumed data (like IC frequencies):
            # replicate the data for the number of pieces
            if y.ndim == 1:
                ys = np.tile(y, (int(nb_pieces),1))
                
        except ValueError as e:
            ys = np.zeros(nb_pieces)
        
        # add data points into the Dataframe
        if signame == 'IC_Frequencies':
            rows['freq_Q1'] = ys[:,0]
            rows['freq_Q2'] = ys[:,1]
            rows['freq_Q4'] = ys[:,2]
        if signame == 'LH_Positions':
            rows['R_LH1'] = ys[:,0]
            rows['R_LH2'] = ys[:,1]
        if signame == 'IC_Positions':
            rows['R_Q1'] = ys[:,0]
            rows['R_Q2'] = ys[:,1]
            rows['R_Q4'] = ys[:,2]
        if signame == 'Datetime':
            rows['year'] = ys[:,0]
            rows['month']= ys[:,1]
            rows['day']  = ys[:,2]
        else:
            
            if ys.ndim > 1:  # if to deal with the case of zeros (ValueError previously)
                ys = ys[:,0]  # keep only mean (yet)
            rows[signame] = np.squeeze(ys)
            
        _df = pd.DataFrame(rows)
    # append data from each pulse to the final DF
    data = data.append(_df)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=108.0), HTML(value='')))

  app.launch_new_instance()





Exporting the Dataframe:

In [40]:
data.to_csv(file_output)

In [41]:
data

Unnamed: 0,pulse,Ag18,Ag19,Cu,year,month,day,freq_Q1,freq_Q2,freq_Q4,...,LH_P_tot,R_LH1,R_LH2,LH_Positions,MHD,Ohmic_P,Prad,Rext_median,Separatrix_P,nl
0,56287,1.540700,,3.959761,2020,12,10,55.299999,55.799999,55.799999,...,0.000000,3.0152,3.0172,3.0152,0.000000,0.0,0.0,2980.880000,0.000000,1.435414
1,56287,,1.033671,3.550893,2020,12,10,55.299999,55.799999,55.799999,...,0.000000,3.0152,3.0172,3.0152,0.000000,0.0,0.0,2950.440000,0.000000,1.545745
2,56287,,1.235728,,2020,12,10,55.299999,55.799999,55.799999,...,0.000000,3.0152,3.0172,3.0152,0.000000,0.0,0.0,2958.220000,0.000000,1.573494
3,56287,,1.143154,,2020,12,10,55.299999,55.799999,55.799999,...,0.000000,3.0152,3.0172,3.0152,0.000000,0.0,0.0,2943.200000,0.000000,1.567759
4,56287,1.000000,1.000000,2.425662,2020,12,10,55.299999,55.799999,55.799999,...,0.000000,3.0152,3.0172,3.0152,0.000000,0.0,0.0,2950.000000,0.000000,1.566279
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40,56489,14.616149,44.623701,273.192056,2020,12,18,55.650002,55.799999,55.500000,...,0.000000,3.0136,3.0167,3.0136,56.020408,0.0,0.0,2989.224490,,5.709460
41,56489,13.575090,44.145603,215.816313,2020,12,18,55.650002,55.799999,55.500000,...,0.000000,3.0136,3.0167,3.0136,59.285714,0.0,0.0,2989.571429,,5.864504
42,56489,15.615888,44.242105,263.748578,2020,12,18,55.650002,55.799999,55.500000,...,0.000000,3.0136,3.0167,3.0136,59.877551,0.0,0.0,2989.653061,0.000526,6.008412
43,56489,16.383295,47.341942,355.213240,2020,12,18,55.650002,55.799999,55.500000,...,0.000000,3.0136,3.0167,3.0136,61.234694,0.0,0.0,2988.693878,0.000525,6.105347
