In [1]:
import os

import pandas as pd

from utils import load_raw_data

In [2]:
# remove rows from all files that are corresponding to the one missing in
# DipoleVertical_Im_Ey_d5mm.txt and DipoleHorizontal_Re_Hx_d15mm.txt files

path_raw = os.path.join('data', 'raw')
antennas = ['DipoleVertical', 'DipoleVertical', 'DipoleHorizontal']
distances = [5, 15, 15]
components = ['Im_Ey', 'Im_Hy', 'Re_Hx']
critical_idxs = []

for antenna, distance, component in zip(antennas, distances, components):
    # load the problematic data
    df_1 = pd.read_csv(  # take this file as the reference
        os.path.join(path_raw, f'{antenna}_Re_Ex_d{distance}mm.txt'),
        sep='\s+', comment='%', header=None,
        usecols=[0, 1, 2], names=['x', 'y', 'z']
    )
    df_2 = pd.read_csv(
        os.path.join(path_raw, f'{antenna}_{component}_d{distance}mm.txt'),
        sep='\s+', comment='%', header=None,
        usecols=[0, 1, 2], names=['x', 'y', 'z']
    )

    # find where the files differ
    for idx, (row_1, row_2) in enumerate(zip(df_1.values[:-1], df_2.values)):
        diff = sum(row_1 - row_2)
        if diff != 0.0:
            critical_idxs.append(idx)
            break

In [3]:
critical_idxs

[50661, 63766, 43076]

In [4]:
# create a clean dataset
path_clean = os.path.join('data', 'clean')
antennas = ['DipoleVertical', 'DipoleHorizontal',
            'ArrayVertical', 'ArrayHorizontal']
distances = [5, 10, 15]

for antenna in antennas:
    for distance in distances:
        if (antenna == 'DipoleVertical') and (distance == 5):
            critical_idx = critical_idxs[0]
        elif (antenna == 'DipoleVertical') and (distance == 15):
            critical_idx = critical_idxs[1]
        elif antenna == 'DipoleHorizontal':
            critical_idx = critical_idxs[2]
        else:
            critical_idx = None
        xyz, (Ex, Ey, Ez), (Hx, Hy, Hz) = load_raw_data(antenna,
                                                        distance,
                                                        critical_idx)
        df = pd.DataFrame(data=xyz)
        df = pd.concat([df,
                        Ex.rename('Ex').astype('complex128'), Ey.rename('Ey'), Ez.rename('Ez'),
                        Hx.rename('Hx'), Hy.rename('Hy'), Hz.rename('Hz'),
                        ], axis=1)
        # df.to_csv(os.path.join(path_clean, f'{antenna}_d{distance}mm.csv'))

In [5]:
df

Unnamed: 0,x,y,z,Ex,Ey,Ez,Hx,Hy,Hz
0,0.001048,-0.009581,-0.003467,0.616368+0.013970j,-3.983336-9.544519j,-1.865476-3.331334j,0.011898+0.023334j,0.031941+0.024177j,-0.090955-0.075735j
1,0.001223,-0.009710,-0.003656,0.741050+1.099401j,-4.776628-8.759916j,-2.271406-3.160169j,0.011714+0.020331j,0.035065+0.026239j,-0.094066-0.070380j
2,0.001159,-0.009859,-0.003366,0.920192+0.484196j,-4.494408-9.648604j,-2.163794-3.565804j,0.010235+0.021193j,0.035062+0.026699j,-0.094654-0.075996j
3,0.001135,-0.009506,-0.003718,0.478751+0.691648j,-4.405063-8.708452j,-2.083377-3.010678j,0.013038+0.022061j,0.032637+0.024043j,-0.091806-0.069870j
4,0.001283,-0.009557,-0.003929,0.617809+1.508246j,-5.056469-7.883062j,-2.412661-2.814283j,0.013541+0.019553j,0.035056+0.025384j,-0.093473-0.064391j
...,...,...,...,...,...,...,...,...,...
62487,0.015500,0.009471,0.018453,-0.820008+0.127444j,0.284741+0.180612j,0.080640+0.164059j,-0.000077-0.000063j,0.001979+0.001615j,-0.004604-0.000828j
62488,0.015500,0.009950,0.018135,-0.856330-0.087885j,0.261990+0.249038j,0.051252+0.131448j,-0.000023-0.000077j,0.001417+0.001357j,-0.004629-0.001781j
62489,0.015500,0.009620,0.018145,-0.855708+0.110536j,0.288186+0.212363j,0.068720+0.146421j,-0.000040-0.000059j,0.001703+0.001435j,-0.004769-0.001169j
62490,0.015500,0.009805,0.018438,-0.833546-0.060914j,0.262751+0.218108j,0.062201+0.151797j,-0.000061-0.000080j,0.001678+0.001583j,-0.004511-0.001434j
