# Parsing ADAS data with pandas for Argon with $\lambda = 750 nm$

Link to the original notebook:
https://nbviewer.jupyter.org/gist/anonymous/40a8f1b3b5e58a63e6c67e703f7c50c7

In [1]:
import pandas as pd
import numpy as np
from scipy import interpolate
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
%matplotlib

Using matplotlib backend: Qt5Agg


In [7]:
file_path = '../data/adas/ar_750.dat'
# open the file, read the number of lines and save
# the line where densities are stored (line 2)
with open(file_path) as file:
    for line_number, line_text in enumerate(file):
        if line_number == 1:
            density_index = line_text
num_lines = line_number + 1

# read two dataframes as suggested on stackoverflow
df2 = pd.read_csv(file_path, delim_whitespace=True,
                  skiprows=filter(lambda x: x%2==0, range(3, num_lines)),
                  header=3)
df3 = pd.read_csv(file_path, delim_whitespace=True,
                  skiprows=filter(lambda x: x%2==1, range(2, num_lines)),
                  header=2)

# concat the dataframes in order to have a single dataframe
df = pd.concat([df2, df3], axis=1)
# add the index values to the dataframes (i.e the densities)
df.index = density_index.strip().split(' ')
# add the densities as a column
df['n_e'] = df.index.values
# reorder the dataframe for data analysis: we want three columns,
# n_e, T_e, X_e
df_melt = pd.melt(df, id_vars='n_e', var_name='T_e', value_name='X_e')
# convert all the values in the column to numbers instead of string
df_melt = df_melt.apply(pd.to_numeric)

In [10]:
# interpolation:
# first, we can see that both p.e.c and n_e have very
# high ranges. So for a simpler interpolation we can
# use their logarithms
# log_n = np.log10(df_melt.n_e)
# log_pec = np.log10(df_melt.n_e)
df_melt[['n_e', 'X_e']] = df_melt[['n_e', 'X_e']].apply(np.log10)

# we want the following relation:
# T = T(n, pec)
# let's see if scipy.interpolate.griddata can do the work
known_points = df_melt[['n_e', 'X_e']]
#random_points = np.array([np.linspace(9.9, 16, 1000), np.linspace(-10, -74, 1000)]).T
random_points_x, random_points_y = np.mgrid[10:16:200j, -10:-74:200j]
grid = interpolate.griddata(known_points, df_melt.T_e, (random_points_x, random_points_y), method='linear')

In [11]:
grid

array([[         nan,          nan,  33.52238315, ...,   0.07458198,
          0.07174099,   0.0689    ],
       [         nan,          nan,  33.51510606, ...,   0.07561012,
          0.07276913,   0.06992814],
       [         nan,          nan,  33.50811297, ...,   0.07663827,
          0.07379727,   0.07095628],
       ..., 
       [         nan,          nan,          nan, ...,   0.07459236,
          0.07174742,   0.0689    ],
       [         nan,          nan,          nan, ...,   0.07455228,
          0.07174618,   0.0689    ],
       [         nan,          nan,          nan, ...,   0.07452524,
          0.07171262,   0.0689    ]])

In [8]:
# plot interpolation data
fig = plt.figure()
ax = fig.gca(projection='3d')
#n_e, pec = np.meshgrid(random_points_x, random_points_y)
ax.plot_surface(random_points_x, random_points_y, grid, alpha=0.2)
ax.scatter(df_melt.n_e, df_melt.X_e, df_melt.T_e, 'b')
ax.set_zlabel('T (eV)')
ax.set_xlabel('Log n ($m^{-3}$)')
ax.set_ylabel('Log X')

<matplotlib.text.Text at 0x7f7c54020d30>

In [None]:
# define the functions for functions.py
def read_adas_file(file_path, element='ar'):
    '''
    This function reads the adas .dat file and 
    returns a pandas dataframe with three cols:
    n_e, T_e, X
    '''
    # open the file, read the number of lines and save
    # the line where densities are stored (line 2)
    with open(file_path) as file:
        for line_number, line_text in enumerate(file):
            if line_number == 1:
                density_index = line_text
    num_lines = line_number + 1

    if (element == 'ar'):
        # read two dataframes as suggested on stackoverflow
        df2 = pd.read_csv(file_path, delim_whitespace=True,
                          skiprows=filter(lambda x: x%2==0, range(3, num_lines)),
                          header=3)
        df3 = pd.read_csv(file_path, delim_whitespace=True,
                          skiprows=filter(lambda x: x%2==1, range(2, num_lines)),
                          header=2)

    # concat the dataframes in order to have a single dataframe
    df = pd.concat([df2, df3], axis=1)
    # add the index values to the dataframes (i.e the densities)
    df.index = density_index.strip().split(' ')
    # add the densities as a column
    df['n_e'] = df.index.values
    # reorder the dataframe for data analysis: we want three columns,
    # n_e, T_e, X_e
    df_melt = pd.melt(df, id_vars='n_e', var_name='T_e', value_name='X_e')
    # convert all the values in the column to numbers instead of string
    df_melt = df_melt.apply(pd.to_numeric)
    
    return df_melt

def interpolate_dataframe(df, points):
    '''
    This function takes a pandas dataframe with three cols:
    n_e, T_e, X and a points array with shape (n, 2).
    It returns an array with dimension (n,) with the 
    interpolated temperature values. For faster and
    simpler interpolation it converts both the n_e and T_e
    into log10 arrays.
    '''
    # interpolation:
    # first, we can see that both p.e.c and n_e have very
    # high range. So for a simpler interpolation we can
    # use their logarithms
    # log_n = np.log10(df_melt.n_e)
    # log_pec = np.log10(df_melt.n_e)
    df_melt[['n_e', 'X_e']] = df_melt[['n_e', 'X_e']].apply(np.log10)

    # we want the following relation:
    # T = T(n, pec)
    # let's see if scipy.interpolate.griddata can do the work
    known_points = df_melt[['n_e', 'X_e']]
    # random_points = np.array([np.linspace(10, 16, 1000), np.linspace(-12, -74, 1000)]).T
    grid = interpolate.griddata(known_points, df_melt.T_e, points, method='cubic')
    
    return grid