# Application of pyKAN for 2D VLBI Data Interpolation

## Introduction

Introduction goes here.

## Imports

In [None]:
import torch
import numpy as np
import pandas as pd
from kan import *
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import warnings

# precoded functions from supplementary modules
from data_preparation import *
from data_visualization import *

## Globals and Settings

In [None]:
# ignore warnings
warnings.filterwarnings(action="ignore")

In [None]:
# initialize torch device as `gpu` if exists else `cpu`
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using `{device}` device for torch.")

Using `cpu` device for torch.


In [None]:
DATASET_FILE = '.\data\Cres_05.txt'

In [None]:
TEST_SIZE = 0.3

In [None]:
# random seed global variable
RANDOM_STATE = 42

# Two-dimensional Case Approach for Crest Model Dataset

## Data Loading

In [None]:
# load data from text file indicated in DATASET_FILE global variable
dataset = pd.read_csv(DATASET_FILE, sep='\t')

In [None]:
# general information about dataset
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 259200 entries, 0 to 259199
Data columns (total 4 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   U       259200 non-null  float64
 1   V       259200 non-null  float64
 2   Re      259200 non-null  float64
 3   Im      259200 non-null  float64
dtypes: float64(4)
memory usage: 7.9 MB


## Data Prepartion

In [None]:
dataset = dataset.sort_values('Baseline').reset_index(drop=True)

X_data = dataset[['U', 'V', 'Baseline', 'Alpha']].values
y_data = dataset[['Re', 'Im']].values

scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X_data)

scaler_y = StandardScaler()
y_scaled = scaler_y.fit_transform(y_data)

X_tensor = torch.from_numpy(X_scaled).float()
y_tensor = torch.from_numpy(y_scaled).float()

In [None]:
# creating initial dataset
dataset_model = create_dataset_from_data(X_tensor, y_tensor, train_ratio=1-TEST_SIZE, device=device)

In [None]:
print("Training dataset input features dimensions:", dataset_model['train_input'].shape)
print("Training dataset target features dimensions:", dataset_model['train_label'].shape)
print("Validation dataset input features dimensions:", dataset_model['test_input'].shape)
print("Validation dataset target features dimensions:", dataset_model['test_label'].shape)

## Model Initialization

In [None]:
# initialize model
model = KAN(width=[4, 12, 12, 2],
            grid=5,
            k=3,
            seed=RANDOM_STATE,
            device=device)


## Training

In [None]:
history = model.fit(dataset_model,
                    opt="LBFGS",
                    steps=1000,
                    update_grid=True);

## Testing

In [None]:
grid_df = generate_uniform_uv_grid(dataset)

test_data_raw = grid_df[['U', 'V', 'Baseline', 'Alpha']].values
test_data_scaled = scaler_X.transform(test_data_raw)
test_data = torch.from_numpy(test_data_scaled).float()

In [None]:
results = model(test_data)

In [None]:
results_denormalized = scaler_y.inverse_transform(results.detach().numpy())
results_amplitude = np.sqrt(results_denormalized[:, 0]**2 + results_denormalized[:, 1]**2)
results_phase = np.arctan2(results_denormalized[:, 1], results_denormalized[:, 0])

In [None]:
plt.figure(figsize=(13, 4))

plt.subplot(1, 2, 1)
plt.scatter(dataset['Baseline'], dataset['Amplitude'], alpha=0.6, color='blue', s=3)
plt.xlabel('Baseline')
plt.ylabel('Amplitude')
plt.title('Dataset: Amplitude vs Baseline')
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
plt.scatter(grid_df['Baseline'], results_amplitude, alpha=0.6, color='red', s=3)
plt.xlabel('Baseline (grid_df)')
plt.ylabel('Results Amplitude')
plt.title('Results: Amplitude vs Baseline')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(13, 4))

plt.subplot(1, 2, 1)
plt.scatter(dataset['Baseline'], dataset['Phase'], alpha=0.2, color='blue', s=1)
plt.xlabel('Baseline')
plt.ylabel('Phase')
plt.title('Dataset: Phase vs Baseline')
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
plt.scatter(grid_df['Baseline'], results_phase, alpha=0.2, color='red', s=1)
plt.xlabel('Baseline (grid_df)')
plt.ylabel('Results Amplitude')
plt.title('Results: Amplitude vs Baseline')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Results and Discussions