In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv(r'datasets/dataset_compressible_flow_60M_training_nstep180.csv')

# Extract data from 50 million to 55 million (assuming row indices)
test_df = df.iloc[50_000_000:55_000_000]

# Save as test dataset
test_df.to_csv('datasets/dataset_compressible_flow_5M_test.csv', index=False)

In [None]:
import csv

csv_path = r'datasets\dataset_compressible_flow_60M_training_nstep180.csv'

with open(csv_path, newline='', encoding='utf-8') as csvfile:
    reader = csv.reader(csvfile)
    headers = next(reader)
    print("Headings:", headers)

## Create prediction dataset

In [4]:
import sys
from pathlib import Path

project_root = Path.cwd().parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

In [None]:
import pandas as pd
import numpy as np
import torch
from src.evaluation import load_model, write_results_to_file
from config import load_config

shear_exp_dir = project_root / r"C:\Users\cervinka\cervinka\GitHub\MathCAS\outputs\2025-08-26_12-47-54_shear_dan_750"
shear_config = load_config(shear_exp_dir / "config.yaml")
shear_model = load_model(shear_config, shear_exp_dir)
shear_model.eval()

res_exp_dir = project_root / r"C:\Users\cervinka\cervinka\GitHub\MathCAS\outputs\2025-09-16_01-10-21_res_predShear750_dan_750"
res_config = load_config(res_exp_dir / "config.yaml")
res_model = load_model(res_config, res_exp_dir)
res_model.eval()

chunk_size = 100_000  # Adjust as needed for your memory
input_path = r"datasets\dataset_compressible_flow_5M_test.csv"
output_path = r"temp_output.csv"

all_y_true = []
all_y_pred = []
total_inference_time = 0.0

first = True
for chunk in pd.read_csv(input_path, chunksize=chunk_size):
    # Predict shear
    shear_inputs = chunk[shear_config.data.in_cols].values
    shear_tensor = torch.tensor(shear_inputs, dtype=torch.float32)
    with torch.no_grad():
        pred_shear_all = shear_model(shear_tensor).detach().cpu().numpy()
    # Select only the Shear prediction (last column)
    if pred_shear_all.ndim == 2:
        pred_shear = pred_shear_all[:, -1]
    else:
        pred_shear = pred_shear_all[-1]
    chunk["PredShear"] = pred_shear

    # Predict with second model and measure time
    res_inputs = chunk[res_config.data.in_cols].values
    res_tensor = torch.tensor(res_inputs, dtype=torch.float32)
    import time
    start_time = time.time()
    with torch.no_grad():
        res_pred = res_model(res_tensor).squeeze().numpy()
    total_inference_time += time.time() - start_time

    output_cols = res_config.data.out_cols
    y_true = chunk[output_cols].values
    y_pred = res_pred if res_pred.ndim == 2 else res_pred.reshape(-1, 1)

    all_y_true.append(y_true)
    all_y_pred.append(y_pred)

    # # Save chunk with PredShear
    # chunk.to_csv(output_path, mode='w' if first else 'a', header=first, index=False)
    # first = False

y_true_full = np.concatenate(all_y_true, axis=0)
y_pred_full = np.concatenate(all_y_pred, axis=0)

# Write metrics and predictions to file
write_results_to_file(
    config=res_config,
    y_pred=y_pred_full,
    y_true=y_true_full,
    exp_dir=res_exp_dir,
    model=res_model,
    inference_time=total_inference_time
)

In [8]:
count = 0
with open(r"C:\Users\cervinka\cervinka\GitHub\MathCAS\datasets\dataset_compressible_flow_eda_50M_training_nstep180_with_dan_1500.csv", "rb") as f:
    for line in f:
        count += 1
print(count)

50000001
