# Creating a database of scalar values for ICRH coupling analysis

In [16]:
# assume working in Jupyter Lab
%matplotlib inline 

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
import sys
sys.path.append('C:\\Users\\JH218595\\Documents\\pywed')
sys.path.append('C:\\Users\\JH218595\\Documents\\IRFMtb')

In [18]:
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt 
plt.rcParams['figure.figsize'] = (10,6)

from control_room import *
from pulse_database import PulseDB
from tqdm.notebook import tqdm

import pandas as pd
import seaborn as sns
sns.set_style('whitegrid')

The database has been created in another notebook. Importing database : 

In [19]:
dt = 0.10 # s

hdf5_filename = 'databases/WEST_C4_ICRH_pulse_data.hdf5'
file_output = 'WEST_C4_database_resumed_parameters.csv'

In [20]:
db = PulseDB(hdf5_filename)
print(f'Database contains {len(db.pulse_list)} shots, from #{db.pulse_list[0]} to #{db.pulse_list[-1]} ')

Database contains 966 shots, from #54404 to #55809 


In [21]:
db = PulseDB(hdf5_filename)

## Creating a meaningfull database with pandas
The idea is to split time in small pieces and to calculate scalar values for each of them

In [22]:
def split_in_pieces(y, t, nb_pieces):
    """Split a time signel y(t) into smaller piece of length dt, and return t, average, min, max and std of each of them"""
    y_mean_min_max, t_pieces = [], []
    if nb_pieces > 0: 
        ts = np.array_split(np.squeeze(t), nb_pieces)
        ys = np.array_split(np.squeeze(y), nb_pieces)
        for (_y, _t) in zip(ys, ts):
            # Get the mean, min and max values of the data in the time piece
            # Keep only if the mean value is within +/- 5% of the min/max
            # otherwise use NaN
            _mean, _mini, _maxi = mean_min_max(_y)
            
            if _mean == _mini == _maxi:  # no data
                y_mean_min_max.append([_mean, _mini, _maxi])           
                t_pieces.append(np.mean(_t))                
            elif (np.abs(_mean - _mini)/_mean < 30/100) and (np.abs(_mean - _maxi)/_mean < 30/100):
                y_mean_min_max.append([_mean, _mini, _maxi])           
                t_pieces.append(np.mean(_t))
            else:
                y_mean_min_max.append([np.nan, np.nan, np.nan]) 
                t_pieces.append(np.nan)
        return np.array(y_mean_min_max), np.array(t_pieces)    
    else:
        return np.array([np.nan, np.nan, np.nan]), np.array([np.nan])
    

In [23]:
data = pd.DataFrame()


for pulse in tqdm(db.pulse_list):
    # start as ip > 100 kA
    ip, t_ip = db.get_signal(pulse, 'Ip')
    
    t_start = t_ip[(ip > 100).squeeze()][0]
    t_end = t_ip[(ip > 100).squeeze()][-1]

    nb_pieces = int(np.round((t_end - t_start)/dt))
    
    rows = {'pulse': pulse}
    for signame in db.list_signal(pulse):           
        try:
            y, t = db.get_signal(pulse, signame)
            # Smooth some noisy signals
            if any(sig in signame for sig in ['Cu', 'Ag18', 'Ag19', 'Langmuir', 'Prad', 'Prad_bulk','LH_P_tot', 'IC_P_tot', 'IC_P_Q1', 'IC_P_Q2', 'IC_P_Q4']):
                y = smooth(y, window_length=21)

            # splitting signals in pieces
            _y, _t = in_between(y, t, t_start, t_end)
            ys, ts = split_in_pieces(_y, _t, nb_pieces)

            # Taking reference time from the plasma current
            if signame == 'Ip':
                rows['time'] = np.squeeze(ts)
                
        except IndexError as e:
            y, t = db.get_signal(pulse, signame)
            # deals with resumed data (like IC frequencies):
            # replicate the data for the number of pieces
            if y.ndim == 1:
                ys = np.tile(y, (int(nb_pieces),1))
                
        except ValueError as e:
            ys = np.zeros(nb_pieces)
        
        # add data points into the Dataframe
        if signame == 'IC_Frequencies':
            rows['freq_Q1'] = ys[:,0]
            rows['freq_Q2'] = ys[:,1]
            rows['freq_Q4'] = ys[:,2]
        if signame == 'IC_Positions':
            rows['R_Q1'] = ys[:,0]
            rows['R_Q2'] = ys[:,1]
            rows['R_Q4'] = ys[:,2]
        if signame == 'Datetime':
            rows['year'] = ys[:,0]
            rows['month']= ys[:,1]
            rows['day']  = ys[:,2]
        else:
            
            if ys.ndim > 1:  # if to deal with the case of zeros (ValueError previously)
                ys = ys[:,0]  # keep only mean (yet)
            rows[signame] = np.squeeze(ys)
            
        _df = pd.DataFrame(rows)
    # append data from each pulse to the final DF
    data = data.append(_df)

HBox(children=(FloatProgress(value=0.0, max=966.0), HTML(value='')))

  app.launch_new_instance()
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)





Exporting the Dataframe:

In [24]:
data.to_csv(file_output)

In [25]:
data

Unnamed: 0,pulse,Ag18,Ag19,Cu,year,month,day,freq_Q1,freq_Q2,freq_Q4,...,Prad,Prad_bulk,Rext_lower,Rext_lower_NICE,Rext_median,Rext_median_NICE,Rext_upper,Rext_upper_NICE,Separatrix_P,nl
0,54404,0.000000,0.000000,,2019.0,7.0,16.0,55.5,55.689999,55.700001,...,,,2812.571429,2.808633,2992.938776,2.944822,2931.816327,2.894968,,
1,54404,0.000000,0.000000,,2019.0,7.0,16.0,55.5,55.689999,55.700001,...,,,2857.551020,2.880549,2921.122449,2.959408,2852.285714,2.916914,-0.027970,1.189777
2,54404,0.000000,0.000000,,2019.0,7.0,16.0,55.5,55.689999,55.700001,...,0.130761,0.128938,2871.816327,2.827263,2936.836735,2.906623,2881.877551,2.859345,,1.202949
3,54404,0.000000,0.000000,,2019.0,7.0,16.0,55.5,55.689999,55.700001,...,0.087718,0.085613,2870.448980,2.891742,2940.877551,2.976342,2885.816327,2.943252,,1.255339
4,54404,0.000000,0.000000,1.273619,2019.0,7.0,16.0,55.5,55.689999,55.700001,...,0.076202,0.073030,2866.163265,2.881724,2933.734694,2.961178,2876.367347,2.926255,0.529017,1.281202
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,55809,113.899019,113.899019,668.422883,2019.0,11.0,7.0,55.5,55.900002,56.049999,...,3.485154,3.196829,,2.862001,2942.229167,2.945885,,2.899803,1.469613,4.866864
76,55809,,,676.237071,2019.0,11.0,7.0,55.5,55.900002,56.049999,...,3.573965,3.338276,,2.843172,2939.250000,2.934550,,2.893063,,5.062341
77,55809,,,,2019.0,11.0,7.0,55.5,55.900002,56.049999,...,2.371503,,,2.854784,2943.041667,2.945176,,2.900379,,4.119820
78,55809,,,343.868944,2019.0,11.0,7.0,55.5,55.900002,56.049999,...,2.208962,1.953566,,2.846469,2942.354167,2.935368,,2.893989,,4.020531
