## Data Preprocessing

In [None]:
import pandas as pd
import os
import plotly.graph_objects as go
import plotly.express as px

In [None]:
data_dir = "/Users/ggito/repos/pinns/data/front_wing"
points_file_name = "points.csv"
points_file_path = os.path.join(data_dir, points_file_name)

p_df = pd.read_csv(points_file_path)
print(p_df)

In [None]:
norms_file_name = "norms.csv"
norms_file_path = os.path.join(data_dir, norms_file_name)

n_df = pd.read_csv(norms_file_path)
print(n_df)

In [None]:
num_of_samples = 20000

In [None]:
sampled_indices = p_df.sample(n=num_of_samples, random_state=1).index

p_sampled_df = p_df.loc[sampled_indices]
n_sampled_df = n_df.loc[sampled_indices]

print(p_sampled_df)
print(n_sampled_df)

In [None]:
x_min, y_min, z_min = p_df.min()
x_max, y_max, z_max = p_df.max()

overall_min = min(x_min, y_min, z_min)
overall_max = max(x_max, y_max, z_max)

range = (overall_min, overall_max)

In [None]:
fig = px.scatter_3d(p_sampled_df, x='x', y='y', z='z')
fig.update_traces(marker_size = 1)
fig.update_traces(marker_color = 'slategrey')
fig.update_layout(
    scene = dict(
        xaxis = dict(range=range),
        yaxis = dict(range=range),
        zaxis = dict(range=range)),)

fig.show()

In [None]:
p_tr_df = pd.DataFrame({
    'x': p_sampled_df['x'] - p_sampled_df['x'].min(),
    'y': p_sampled_df['y'] - p_sampled_df['y'].min(),
    'z': p_sampled_df['z'] - p_sampled_df['z'].min()
})

print(p_tr_df)

In [None]:
x_min, y_min, z_min = p_tr_df.min()
x_max, y_max, z_max = p_tr_df.max()

overall_min = min(x_min, y_min, z_min)
overall_max = max(x_max, y_max, z_max)

range = (overall_min, overall_max)

In [None]:
fig = px.scatter_3d(p_tr_df, x='x', y='y', z='z')
fig.update_traces(marker_size = 1)
fig.update_traces(marker_color = 'slategrey')
fig.update_layout(
    scene = dict(
        xaxis = dict(range=range),
        yaxis = dict(range=range),
        zaxis = dict(range=range)),)

fig.show()

In [None]:
p_unif_scaled_df = p_tr_df / overall_max

print(p_unif_scaled_df)

In [None]:
x_min, y_min, z_min = p_unif_scaled_df.min()
x_max, y_max, z_max = p_unif_scaled_df.max()

overall_min = min(x_min, y_min, z_min)
overall_max = max(x_max, y_max, z_max)

range = (overall_min, overall_max)

In [None]:
p_unif_scaled_df = p_unif_scaled_df / 2

In [None]:
fig = px.scatter_3d(p_unif_scaled_df, x='x', y='y', z='z')
fig.update_traces(marker_size = 1)
fig.update_traces(marker_color = 'slategrey')
fig.update_layout(
    scene = dict(
        xaxis = dict(range=range),
        yaxis = dict(range=range),
        zaxis = dict(range=range)),)

fig.show()

In [None]:
domain_size = overall_max - overall_min

p_unif_scaled_df.x = p_unif_scaled_df.x + 0.25 * domain_size
p_unif_scaled_df.y = p_unif_scaled_df.y + 0.5 * domain_size
p_unif_scaled_df.z = p_unif_scaled_df.z + 0.1 * domain_size

In [None]:
fig = px.scatter_3d(p_unif_scaled_df, x='x', y='y', z='z')
fig.update_traces(marker_size = 1)
fig.update_traces(marker_color = 'slategrey')

fig.update_layout(
    scene = dict(
        xaxis = dict(range=range),
        yaxis = dict(range=range),
        zaxis = dict(range=range)))

fig.show()

In [None]:
scatter = go.Scatter3d(
    x=p_unif_scaled_df['x'],
    y=p_unif_scaled_df['y'],
    z=p_unif_scaled_df['z'],
    mode='markers',
    marker=dict(size=1, color='slategrey')
)

cone = go.Cone(
    x=p_unif_scaled_df['x'],
    y=p_unif_scaled_df['y'],
    z=p_unif_scaled_df['z'],
    u=n_sampled_df['x'],
    v=n_sampled_df['y'],
    w=n_sampled_df['z'],
    sizeref=5,
    colorscale='haline',
    cmin=0,
    cmax=2
)

fig = go.Figure(data=[scatter, cone])

fig.update_layout(
    scene = dict(
        xaxis = dict(range=range),
        yaxis = dict(range=range),
        zaxis = dict(range=range)))

fig.show()

In [None]:
p_unif_scaled_df.describe()

In [None]:
n_sampled_df.describe()

In [None]:
final_points_file_name = 'points_final.csv'
final_norms_file_name = 'norms_final.csv'

final_points_file_path = os.path.join(data_dir, final_points_file_name)
final_norms_file_path = os.path.join(data_dir, final_norms_file_name)

p_unif_scaled_df.to_csv(final_points_file_path, index=False, header=True)
n_sampled_df.to_csv(final_norms_file_path, index=False, header=True)