In [1]:
%matplotlib tk
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import xarray as xr 
import scipy.stats as stat
import sys
import os
import glob
import re
sys.path.append('/home/jleland/Coding/Projects/flopter')
import flopter.core.ivdata as iv
import flopter.core.lputils as lp
import flopter.magnum.database as ut
import flopter.core.fitters as fts# Create analysed dataset metadata 

In [4]:
path_to_datasets = '/home/jleland/data/external/magnum/'
# path_to_analysed_datasets = 'analysed_2'
path_to_analysed_datasets = 'analysed_4'
# path_to_analysed_datasets = 'analysed_3_downsampled'
# path_to_analysed_datasets = 'test'
os.chdir(path_to_datasets)

---
## Saving / loading section
The metadata doesn't need to be reconstructed _every_ time, so we can store it in a .csv file output by pandas. The following 2 methods save and load, use whichever is appropriate

In [5]:
metadata_filename = f'{path_to_analysed_datasets}_xr_metadata.csv'

In [6]:
# Loading the metadata .csv file into a pandas dataframe
analysed_infos_df = pd.read_csv(metadata_filename).set_index('adc_index')

In [7]:
# Saving the metadata.csv file from the constructed pandas dataframe
analysed_infos_df.to_csv(metadata_filename)

End of save/load segment

---

## Index choice section

In [8]:
magnum_probes = lp.MagnumProbes()

In [9]:
indices = analysed_infos_df.where(analysed_infos_df.time_len >= 500).dropna().index.values
indices
print(len(indices), len(analysed_infos_df))

440 442


In [6]:
# list(analysed_infos_df.groupby('time_len'))

In [11]:
# Joe's requested shots
indices = [396, 398, 409]

In [8]:
# Axial Scan
indices = [132,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189]

In [7]:
# 100A, 5slm (He), 0.8T
indices = [357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386]

In [24]:
# 100A, 5slm (He), 1.2T
indices = [245, 246, 247, 248, 249, 250, 251, 252, 254, 255, 256, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271]

In [7]:
# 110A, 5slm (H), 1.2T
indices = [320, 321, 322, 323, 324, 325, 327, 328, 329, 330, 331, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346]

In [7]:
indices = [357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386]\
          + [132,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189]\
          + [320, 321, 322, 323, 324, 325, 327, 328, 329, 330, 331, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346]

In [12]:
analysed_infos_df.loc[indices]

Unnamed: 0_level_0,shot_number,shot_timestamp,shot_time,filename,time_len,sweep_len
adc_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
396,398,6699383298543284224,2019-06-06 12:48:45,analysed_4/a398_396_6699383298543284224.nc,5000,1499
398,400,6699384234551673856,2019-06-06 12:52:23,analysed_4/a400_398_6699384234551673856.nc,5000,507
409,411,6699401399545384960,2019-06-06 13:58:59,analysed_4/a411_409_6699401399545384960.nc,5000,301


## Load adc file metadata

In [14]:
os.chdir('/home/jleland/data/external/magnum/')
meta_data_ds = xr.open_dataset('all_meta_data.nc')
print(meta_data_ds)

<xarray.Dataset>
Dimensions:                 (shot_number: 523, ts_radial_pos: 46)
Coordinates:
  * shot_number             (shot_number) int32 0 1 2 3 4 ... 519 520 521 522
    ts_number               (shot_number) float64 ...
    ts_timestamp            (shot_number) float64 ...
    ts_time                 (shot_number) datetime64[ns] ...
    adc_index               (shot_number) float64 ...
    adc_time                (shot_number) datetime64[ns] ...
  * ts_radial_pos           (ts_radial_pos) float64 -36.47 -34.8 ... 37.14 38.82
Data variables:
    adc_filename            (shot_number) object ...
    ts_density              (shot_number, ts_radial_pos) float64 ...
    ts_temperature          (shot_number, ts_radial_pos) float64 ...
    ts_d_density            (shot_number, ts_radial_pos) float64 ...
    ts_d_temperature        (shot_number, ts_radial_pos) float64 ...
    adc_folder              (shot_number) object ...
    adc_calibration_index   (shot_number) object ...
    adc_4_

# Analysis of shot / adc_file index selection

In [15]:
def preprocess_sel(ds, sweep=slice(0,997)):
    return ds.sel(sweep=sweep)

def preprocess_average(ds):
    ds = ds.reset_index('time', drop=True)
    ds_avg = ds.mean('sweep')
    ds_avg = ds_avg.assign({'d_current': ds.std('sweep')['current']})
    return ds_avg

def preprocess_average_downsample(ds, downsample_to=500):
    ds = ds.reset_index('time', drop=True)
    dsf = int(len(ds.time) / downsample_to)
    ds = ds.sel(time=slice(0, len(ds.time), dsf))
    ds_avg = ds.mean('sweep')
    ds_avg = ds_avg.assign({'d_current': ds.std('sweep')['current']})
    return ds_avg

In [16]:
analysed_metadata_oi = analysed_infos_df.loc[indices]

files_oi = analysed_metadata_oi['filename'].values
shot_numbers = analysed_metadata_oi['shot_number'].values
shot_numbers_da = xr.DataArray(shot_numbers, dims=['shot_number'], name='shot_number')
min_sweep_len = analysed_metadata_oi['sweep_len'].min()
print(min_sweep_len)

301


# Filter Arcs and Find Region of Interest of Each Shot

In [17]:
select_meta_ds = meta_data_ds.sel(shot_number=shot_numbers_da)

In [18]:
ds_list = [xr.open_dataset(fn) for fn in files_oi]

In [19]:
fig, ax = plt.subplots()
start_times = []
for i, ds in enumerate(ds_list):
    print(files_oi[i])
#     ds.mean('direction').max('time').set_coords('shot_time')['current'].plot.line(x='shot_time', hue='probe')
    start_times.append(ds.isel(probe=0, sweep=0, direction=0)['start_time'].values)
    
ax.plot(shot_numbers, start_times)
fig.show()

analysed_4/a398_396_6699383298543284224.nc
analysed_4/a400_398_6699384234551673856.nc
analysed_4/a411_409_6699401399545384960.nc


In [23]:
start_times = np.array(start_times)

In [24]:
problem_shot_indices = np.where(start_times >= 1.05)
problem_shots = shot_numbers[problem_shot_indices]
problem_shots_da = xr.DataArray(problem_shots, dims=['shot_number'], name='shot_number')
print(list(problem_shots))


[215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 395, 399, 400, 401, 402, 404, 405, 406, 407, 409, 410, 411, 412, 413, 414, 415, 416, 423, 424, 432, 433, 434, 435, 436, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451]


In [16]:
problem_shots = [215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 395, 399, 400, 401, 402, 404, 405, 406, 407, 409, 410, 411, 412, 413, 414, 415, 416, 423, 424, 432, 433, 434, 435, 436, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451]
len_ps = len(problem_shots)
for i, ps in enumerate(problem_shots):
    print(f'Shot {ps}: {((i/len_ps) * 100):.1f}%')

Shot 215: 0.0%
Shot 216: 1.2%
Shot 217: 2.5%
Shot 218: 3.8%
Shot 219: 5.0%
Shot 220: 6.2%
Shot 221: 7.5%
Shot 222: 8.8%
Shot 223: 10.0%
Shot 224: 11.2%
Shot 225: 12.5%
Shot 226: 13.8%
Shot 227: 15.0%
Shot 228: 16.2%
Shot 229: 17.5%
Shot 230: 18.8%
Shot 231: 20.0%
Shot 232: 21.2%
Shot 233: 22.5%
Shot 234: 23.8%
Shot 235: 25.0%
Shot 236: 26.2%
Shot 237: 27.5%
Shot 238: 28.7%
Shot 239: 30.0%
Shot 240: 31.2%
Shot 241: 32.5%
Shot 242: 33.8%
Shot 243: 35.0%
Shot 359: 36.2%
Shot 360: 37.5%
Shot 361: 38.8%
Shot 362: 40.0%
Shot 363: 41.2%
Shot 364: 42.5%
Shot 365: 43.8%
Shot 366: 45.0%
Shot 367: 46.2%
Shot 368: 47.5%
Shot 369: 48.8%
Shot 370: 50.0%
Shot 371: 51.2%
Shot 395: 52.5%
Shot 399: 53.8%
Shot 400: 55.0%
Shot 401: 56.2%
Shot 402: 57.5%
Shot 404: 58.8%
Shot 405: 60.0%
Shot 406: 61.3%
Shot 407: 62.5%
Shot 409: 63.7%
Shot 410: 65.0%
Shot 411: 66.2%
Shot 412: 67.5%
Shot 413: 68.8%
Shot 414: 70.0%
Shot 415: 71.2%
Shot 416: 72.5%
Shot 423: 73.8%
Shot 424: 75.0%
Shot 432: 76.2%
Shot 433: 77.5%


In [17]:
test_dir = 'test/'
# test_files = ['a215_225_6698640011711049728.nc', 'a157_162_6698304580043636736.nc']
# test_shots = [215, 157]

test_files = glob.glob(test_dir + '/*.nc')
test_files.sort()

test_infos = []

for i, anal_ds in enumerate(test_files):
    match = re.search("\/a{1}([0-9]{3})_([0-9]{3})_([0-9]{19})\.nc", anal_ds)
    shot_index = int(match.group(1))
    adc_index = int(match.group(2))
    shot_timestamp = int(match.group(3))
    shot_time = ut.human_datetime_str(int(match.group(3)))
    
    ds = xr.open_dataset(anal_ds)
    time_len = len(ds['time'])
    sweep_len = len(ds['sweep'])
    
    test_infos.append({
        'adc_index':adc_index, 
        'shot_number':shot_index, 
        'shot_timestamp':shot_timestamp, 
        'shot_time':shot_time, 
        'filename':anal_ds,
        'time_len': time_len,
        'sweep_len': sweep_len
    })

test_infos_df = pd.DataFrame(test_infos).set_index('adc_index')
test_shots = test_infos_df['shot_number'].values

In [18]:
test_ds_list = [xr.open_dataset(fn) for fn in test_files]

In [19]:
test_start_times = []
for i, ds in enumerate(ds_list):
    print(test_files[i])
#     ds.mean('direction').max('time').set_coords('shot_time')['current'].plot.line(x='shot_time', hue='probe')
    test_start_times.append(ds.isel(probe=0, sweep=0, direction=0)['start_time'].values)

test/a215_225_6698640011711049728.nc
test/a216_226_6698640578178048000.nc
test/a217_227_6698643189965025280.nc
test/a218_229_6698645166135541760.nc
test/a219_230_6698645754549968896.nc
test/a220_232_6698647714743185408.nc
test/a221_234_6698650666522157056.nc
test/a222_235_6698652327998412800.nc
test/a223_236_6698655832758107136.nc
test/a224_237_6698657200585526272.nc
test/a225_238_6698658102491749376.nc
test/a226_239_6698658708129149952.nc
test/a227_240_6698661025837923328.nc
test/a228_241_6698661541924644864.nc
test/a229_242_6698662502543517696.nc
test/a230_243_6698663701835258880.nc
test/a231_244_6698664778547857408.nc
test/a232_245_6698665325687429120.nc
test/a233_246_6698665938798135296.nc
test/a234_247_6698666541643821056.nc
test/a235_248_6698666892459114496.nc
test/a236_249_6698667158620034048.nc
test/a237_250_6698667836284542976.nc
test/a238_251_6698670134880661504.nc
test/a239_252_6698670651268073472.nc
test/a240_253_6698670995125477376.nc
test/a241_254_6698671468133408768.nc
t

IndexError: list index out of range

In [20]:
fig, ax = plt.subplots()
ax.plot(shot_numbers, start_times)
ax.plot(test_shots, test_start_times, 'x')
fig.show()

Re-running the problem datasets seems to have fixed the problem, as they now all start from the same time. It would still be good to run an average IV thing on them all though to check. 

In [22]:
combined_ds = xr.open_mfdataset(test_files, concat_dim=problem_shots_da, preprocess=preprocess_average_downsample, parallel=True, combine='nested')
# combined_ds = combined_ds.load()
combined_ds

<xarray.Dataset>
Dimensions:      (direction: 2, probe: 2, shot_number: 80, time: 500)
Coordinates:
  * probe        (probe) object 'S' 'L'
  * direction    (direction) object 'up' 'down'
  * shot_number  (shot_number) int64 215 216 217 218 219 ... 447 448 449 450 451
Dimensions without coordinates: time
Data variables:
    voltage      (shot_number, probe, direction, time) float64 dask.array<chunksize=(1, 2, 2, 500), meta=np.ndarray>
    current      (shot_number, probe, direction, time) float64 dask.array<chunksize=(1, 2, 2, 500), meta=np.ndarray>
    shot_time    (shot_number, probe, direction, time) float64 dask.array<chunksize=(1, 2, 2, 500), meta=np.ndarray>
    start_time   (shot_number, probe, direction) float64 dask.array<chunksize=(1, 2, 2), meta=np.ndarray>
    d_current    (shot_number, probe, direction, time) float64 dask.array<chunksize=(1, 2, 2, 500), meta=np.ndarray>

In [25]:
fig, ax = plt.subplots(2)
combined_ds.mean('direction')['start_time'].plot.line(ax=ax[0], marker='+', linestyle='none', hue='probe')
combined_ds.mean('direction')['start_time'].plot.line(ax=ax[1], marker='+', linestyle='none', hue='probe')
plt.show()

In [30]:
problem_ds = combined_ds.mean('direction')

for shot in problem_ds['shot_number'].values[0:10]:
    ds = problem_ds.sel(shot_number=shot).set_coords('voltage')
    
    fig, axes = plt.subplots(2, 2, sharex='col')
    ds.sel(probe='S')['current'].plot.line(ax=axes[0][0], x='voltage')   
    ds.sel(probe='L')['current'].plot.line(ax=axes[1][0], x='voltage')
    
    ds.sel(probe='S')['current'].plot.line(ax=axes[0][1])
    ds.sel(probe='S')['voltage'].plot.line(ax=axes[0][1])
    ds.sel(probe='L')['current'].plot.line(ax=axes[1][1])
    ds.sel(probe='L')['voltage'].plot.line(ax=axes[1][1])
    