<a href="https://colab.research.google.com/github/adi-devv/NuFu-Archive/blob/main/src/LaserProton.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load
data_path = '/content/drive/MyDrive/Colab_Projects/laserProton/fuchs_v3-2_seed-5_points_25000_noise_0.csv'
df = pd.read_csv(data_path)

# Display sample
print("Raw Data Sample:")
print(df.head())

# 80/20 split
train_size = int(0.8 * len(df))  # 20,000 for training, 5,000 for testing
train_df = df.iloc[:train_size].copy()
test_df = df.iloc[train_size:].copy()

# Logarithmic Transformation
log_transform_cols = ["Intensity_(W_cm2)", "Max_Proton_Energy_(MeV)",
                     "Total_Proton_Energy_(MeV)", "Avg_Proton_Energy_(MeV)"]

for col in log_transform_cols:
    train_df[f"log_{col}"] = np.log(train_df[col])
    test_df[f"log_{col}"] = np.log(test_df[col])

# Z-Score Normalization
scaler = StandardScaler()
features_to_normalize = [f"log_{col}" for col in log_transform_cols] + ["Focal_Distance_(um)", "Target_Thickness (um)"]

scaler.fit(train_df[features_to_normalize])

train_df[features_to_normalize] = scaler.transform(train_df[features_to_normalize])
test_df[features_to_normalize] = scaler.transform(test_df[features_to_normalize])

# Retain original columns for reference or post-processing
original_cols = [col for col in df.columns if col not in features_to_normalize]
train_df = pd.concat([train_df, df.iloc[:train_size][original_cols]], axis=1)
test_df = pd.concat([test_df, df.iloc[train_size:][original_cols]], axis=1)

x_train = train_df[features_to_normalize].values
y_train = train_df[[f"log_{col}" for col in log_transform_cols]].values
x_test = test_df[features_to_normalize].values
y_test = test_df[[f"log_{col}" for col in log_transform_cols]].values

# Display processed data sample
# print("\nPreprocessed Training Data Sample:")
# print(train_df.head())

# Save to Google Drive
train_df.to_csv('/content/drive/MyDrive/Colab_Projects/laserProton/preprocessed_train_data_noise0.csv', index=False)
test_df.to_csv('/content/drive/MyDrive/Colab_Projects/laserProton/preprocessed_test_data_noise0.csv', index=False)
print("Preprocessed data saved to /content/drive/MyDrive/Colab_Projects/laserProton/")

Raw Data Sample:
   Avg_Proton_Energy_(MeV)  Avg_Proton_Energy_Exact_(MeV)  \
0                 0.007261                       0.007261   
1                 0.119684                       0.119684   
2                 0.016739                       0.016739   
3                 0.186123                       0.186123   
4                 0.048987                       0.048987   

   Focal_Distance_(um)  Intensity_(W_cm2)  Max_Proton_Energy_(MeV)  \
0             1.181257       1.667221e+18                 0.025138   
1            -7.518057       7.425613e+18                 0.597101   
2             3.919023       1.609604e+18                 0.065116   
3             4.824679       8.291076e+18                 0.984354   
4             1.102200       3.079011e+18                 0.204535   

   Max_Proton_Energy_Exact_(MeV)  Pulse_Duration_(fs)  Spot_Size_(FWHM um)  \
0                       0.025138                 40.0                  1.5   
1                       0.597101       

In [None]:
!pip install cuml-cu11 --extra-index-url=https://pypi.nvidia.com

Looking in indexes: https://pypi.org/simple, https://pypi.nvidia.com
Collecting cuml-cu11
  Downloading https://pypi.nvidia.com/cuml-cu11/cuml_cu11-25.6.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (8.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.2/8.2 MB[0m [31m69.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cuda-python<12.0a0,>=11.8.5 (from cuml-cu11)
  Downloading cuda_python-11.8.7-py3-none-any.whl.metadata (14 kB)
Collecting cudf-cu11==25.6.* (from cuml-cu11)
  Downloading https://pypi.nvidia.com/cudf-cu11/cudf_cu11-25.6.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m272.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cupy-cuda11x>=12.0.0 (from cuml-cu11)
  Downloading cupy_cuda11x-13.6.0-cp312-cp312-manylinux2014_x86_64.whl.metadata (2.4 kB)
Collecting cuvs-cu11==25.6.* (from cuml-cu11)
  Downloading https://pypi.nvidia.com/

In [10]:
# Import necessary libraries (cuml for SVR)
from cuml.svm import SVR
import numpy as np
import pandas as pd

# Quick check to ensure variables are defined from previous cell
print(f"x_train shape: {x_train.shape}, y_train shape: {y_train.shape}")
assert x_train is not None and y_train is not None, "Variables not defined from previous cell!"

# Investigate unexpected y_train shape
print("y_train columns:", train_df.columns[train_df.columns.str.startswith('log_')])
target_cols = ["log_Max_Proton_Energy_(MeV)", "log_Total_Proton_Energy_(MeV)", "log_Avg_Proton_Energy_(MeV)"]
if y_train.shape[1] > len(target_cols):
    print(f"Warning: y_train has {y_train.shape[1]} columns, using first {len(target_cols)} for training.")
    y_train = y_train[:, :len(target_cols)]

try:
    # Train separate SVR models for each target
    models = {}
    for i, target in enumerate(target_cols):
        model = SVR(kernel='rbf', C=2.5, epsilon=0.01, tol=0.001)  # Per Section 3.1
        model.fit(x_train, y_train[:, i])
        models[target] = model
        print(f"Trained model for {target}")

    # Make predictions
    predictions = {}
    for target, model in models.items():
        predictions[target] = model.predict(x_train)  # Using x_train for consistency with fit

    # Detransform and apply bias correction
    training_output_means = {
        "Max_Proton_Energy_(MeV)": train_df["Max_Proton_Energy_(MeV)"].mean(),
        "Total_Proton_Energy_(MeV)": train_df["Total_Proton_Energy_(MeV)"].mean(),
        "Avg_Proton_Energy_(MeV)": train_df["Avg_Proton_Energy_(MeV)"].mean()
    }

    def apply_bias_correction(predictions, training_means, feature_name):
        detransformed_preds = np.exp(predictions)
        mean_detransformed = detransformed_preds.mean()
        correction_factor = training_means[feature_name] / mean_detransformed
        return detransformed_preds * correction_factor

    corrected_predictions = {}
    for target, pred in predictions.items():
        base_target = target.replace("log_", "")
        corrected_predictions[base_target] = apply_bias_correction(pred, training_output_means, base_target)

    # Evaluate MAPE
    exact_values = {
        "Max_Proton_Energy_(MeV)": test_df["Max_Proton_Energy_Exact_(MeV)"].values,
        "Total_Proton_Energy_(MeV)": test_df["Total_Proton_Energy_Exact_(MeV)"].values,
        "Avg_Proton_Energy_(MeV)": test_df["Avg_Proton_Energy_Exact_(MeV)"].values
    }

    for target in corrected_predictions:
        mape = np.mean(np.abs((corrected_predictions[target] - exact_values[target]) / exact_values[target])) * 100
        print(f"MAPE for {target}: {mape:.2f}%")

    # Save predictions to Drive
    results_df = pd.DataFrame(corrected_predictions)
    results_df.to_csv('/content/drive/MyDrive/Colab_Projects/laserProton/predictions_noise0.csv', index=False)
    print("Predictions saved to /content/drive/MyDrive/Colab_Projects/laserProton/predictions_noise0.csv")

except Exception as e:
    print(f"Error occurred: {e}")
    print("Ensure cuml is installed: !pip install cuml-cu11 --extra-index-url=https://pypi.nvidia.com")

x_train shape: (20000, 6), y_train shape: (20000, 3)
y_train columns: Index(['log_Intensity_(W_cm2)', 'log_Max_Proton_Energy_(MeV)',
       'log_Total_Proton_Energy_(MeV)', 'log_Avg_Proton_Energy_(MeV)'],
      dtype='object')
Trained model for log_Max_Proton_Energy_(MeV)
Trained model for log_Total_Proton_Energy_(MeV)
Trained model for log_Avg_Proton_Energy_(MeV)
Error occurred: operands could not be broadcast together with shapes (5000,) (2,) 
Ensure cuml is installed: !pip install cuml-cu11 --extra-index-url=https://pypi.nvidia.com
