In [237]:
import pandas as pd
import numpy as np
import re

In [238]:
df = pd.read_csv(r'mean_luminosity.csv', index_col=0)
df['mach'] = df.config.str.findall('M[_-]?\d+').astype('str').replace('\D+', '', regex=True)
df =  df[~df['mach'].str.fullmatch('')]


df.loc[df.mach == '093', 'mach'] = '095'
df.loc[df.mach == '05', 'mach'] = '050'
df['mach'] = df.mach.str.replace('0', '0.', n=1).astype('float16')
df.mach.unique()

df['configuration'] = df.config.str.findall('62\d{3}').apply(lambda x: x[0])



pattern = re.compile(r'(B(eta)?[-_]?\d+)')
df['beta'] = df.config.apply(lambda x: pattern.findall(x)[0][0])
df['beta'] = df.beta.str.replace(r'[bB]eta', '', regex=True).astype('int')
ptap_cols = df.columns[df.columns.str.match('^\d+$')]
new_ptap_cols = 'PT' + ptap_cols.values

ptap_dix = {ptap_cols[i]: new_ptap_cols[i] for i in range(len(ptap_cols))}
df.rename(columns=ptap_dix, inplace=True)
df.drop(columns=['config'], inplace=True)
df.columns = df.columns.str.lower()


# round mach column values to exact vals
m_ls = np.array([0, 0.5, 0.6, 0.7, 0.8, 0.85, 0.9, 0.95])
m_ls_bc = np.tile(m_ls, (len(df.mach),1)) # Broadcasts mach list to mathc number of rows in df_p
df['mach'] = m_ls_bc[np.isclose(df.mach.values.reshape(-1, 1), m_ls.reshape(1, -1), atol=0.02)]
df.drop(index=[15,63], inplace=True) # broken wind tunnel runs
df[['configuration', 'beta', 'mach']] = df[['configuration', 'beta', 'mach']].astype('str')
df = df.drop_duplicates()

df_windoff = df.query(r"mach=='0.0'").copy()

# display(df_windoff)


# Create a ratio data set i.e. pt_x / pt_x_windoff

# match configuration -> divide by matching column name
ptap_cols = df.columns[df.columns.str.match("pt.+") | df.columns.str.match("k.+")].values
other_cols = df.columns[~df.columns.str.match("pt.+") & ~df.columns.str.match("k.+")].values
# configs = df.configuration.unique()

# ex1 = df.query("configuration=='62050' & mach=='0.9'")
# ex2 = df_windoff.query("configuration=='62050'")


df_intensity = df.melt(id_vars=other_cols, value_vars=ptap_cols, var_name='ptap', value_name='intensity')
df_intensity_windoff =  df_windoff.melt(id_vars=other_cols, value_vars=ptap_cols, var_name='ptap', value_name='windoff_intensity').drop(columns='mach')
# df_intensity.shape
df_intensity = df_intensity.merge(df_intensity_windoff, on=['configuration', 'beta', 'ptap'], how='left')
df_intensity['intensity_ratio'] = df_intensity.eval('intensity/windoff_intensity')



In [239]:
df_p = pd.read_csv('ptap_pressures.csv',na_values=99999, index_col=0)
df_p = df_p.loc[:, ~df_p.columns.str.contains('abs')]

ptap_ratio_cols = df_p.columns[df_p.columns.str.match(".+ph")]
ptap_ratio_cols_new = df_p.columns.str.replace('.ph', '', regex=True)
ptap_dix = {df_p.columns.values[i]: ptap_ratio_cols_new[i] for i in range(len(df_p.columns))}
df_p.rename(columns=ptap_dix, inplace=True)

df_p.drop(columns=['T0', 'H0', 'SERIAL', 'PS'], inplace=True)
df_p.columns = df_p.columns.str.lower()


# round mach column values to exact vals
m_ls = np.array([0, 0.5, 0.6, 0.7, 0.8, 0.85, 0.9, 0.95])
m_ls_bc = np.tile(m_ls, (len(df_p.mach),1)) # Broadcasts mach list to mathc number of rows in df_p
df_p['mach'] = m_ls_bc[np.isclose(df_p.mach.values.reshape(-1, 1), m_ls.reshape(1, -1), atol=0.02)]
df_p.mach
df_p.rename(columns={'config':'configuration'}, inplace=True)
df_p[['configuration', 'beta', 'mach']] = df_p[['configuration', 'beta', 'mach']].astype('str')
ptap_cols = df.columns[df.columns.str.match("pt.+")].values
df_p = df_p.melt(id_vars=other_cols, value_vars=ptap_cols, var_name='ptap', value_name='pressure_ratio')

display(df_p.head())


Unnamed: 0,mach,configuration,beta,ptap,pressure_ratio
0,0.0,62010,0,pt1,1.000035
1,0.95,62010,0,pt1,0.541238
2,0.95,62010,0,pt1,0.543248
3,0.95,62010,0,pt1,0.545254
4,0.9,62010,0,pt1,0.568241


In [240]:
df_merge = pd.merge(df_intensity, df_p, how='left', on=['configuration', 'mach', 'beta', 'ptap'])
df_merge.dropna(subset=['intensity_ratio'], inplace=True)

In [241]:
df_merge.to_csv('almost_clean_dataset_pressures_intensities.csv')

In [242]:
df_merge.query("configuration=='62040'")

Unnamed: 0,mach,configuration,beta,ptap,intensity,windoff_intensity,intensity_ratio,pressure_ratio
0,0.5,62040,0,pt1,461.543646,390.217668,1.182785,0.838157
1,0.0,62040,0,pt1,390.217668,390.217668,1.000000,1.000173
2,0.7,62040,0,pt1,506.904412,390.217668,1.299030,0.714724
4,0.8,62040,0,pt1,539.496050,390.217668,1.382552,0.654706
5,0.85,62040,0,pt1,537.269918,390.217668,1.376847,0.624146
...,...,...,...,...,...,...,...,...
1498,0.7,62040,0,k4,752.182635,563.231318,1.335477,
1500,0.8,62040,0,k4,809.620019,563.231318,1.437456,
1501,0.85,62040,0,k4,811.064739,563.231318,1.440021,
1502,0.9,62040,0,k4,825.172833,563.231318,1.465069,
