In [1]:
import sys
sys.path.append("../")

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from src.utils_ import find_file
pd.set_option("display.max_columns", None, "display.max_rows", None, "display.max_colwidth", None, "display.precision", 4, "display.float_format", "{:,.4f}".format, "display.max_colwidth", None)

In [None]:
df = pd.read_csv(find_file(file="14KP_48CLA.csv"))

X_cols = [f"lower_weight_{i}" for i in range(6)] + [f"upper_weight_{i}" for i in range(6)] + ["TE_thickness", "leading_edge_weight"]
y_cols = [f"CL_{i}" for i in range(48)] + [f"alpha_{i}" for i in range(48)]

In [None]:
print("Dataset Overview:")
print(df.shape)
df.head()

In [None]:
print("Missing Values:")
df.isnull().sum()

In [None]:
print("Feature Summary:")
df.describe()

In [None]:
print("Label Summary (CL & Alpha):")
df[y_cols].describe()

In [None]:
print("Feature distributions:")
for col in X_cols:
    plt.figure(figsize=(6, 3))
    sns.histplot(df[col], kde=True, bins=30)
    plt.title(f"Distribution of {col}")
    plt.grid(True)
    plt.tight_layout()
    plt.show()

In [None]:
print("Correlation heatmap:")
corr = df[X_cols].corr(numeric_only=True)
plt.figure(figsize=(10, 8))
sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Feature Correlation Heatmap")
plt.tight_layout()
plt.show()

In [None]:
print("Plotting sample lift curves...")
sample_ids = df.sample(3, random_state=42).index
for idx in sample_ids:
    cl = df.loc[idx, [f"CL_{i}" for i in range(48)]].values
    alpha = df.loc[idx, [f"alpha_{i}" for i in range(48)]].values
    label = df.loc[idx, 'aerofoil_name'] if 'aerofoil_name' in df.columns else f"Sample {idx}"
    
    plt.plot(alpha, cl, label=label)

plt.xlabel("Angle of Attack (α)")
plt.ylabel("Lift Coefficient (CL)")
plt.title("CL vs Alpha (Sample Aerofoils)")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
for target_col in y_cols:
    print(f"Correlation to {target_col}:")
    target_corr = df[X_cols + [target_col]].corr()[target_col].drop(target_col)
    print(target_corr.sort_values(ascending=False), "\n")


In [None]:
print("Plotting correlation heatmap against all targets:")
for target_col in y_cols:
    plt.figure(figsize=(10, 8))
    target_corr = df[X_cols + [target_col]].corr()[target_col].drop(target_col)
    sns.heatmap(target_corr.to_frame(), annot=True, cmap="coolwarm", fmt=".2f")
    plt.title(f"Correlation Heatmap for {target_col}")
    plt.tight_layout()
    plt.show()