## Random Forest - Three Parameters 

In [1]:
#Install Cudf and cuml from Rapids website before using this.
#Changed Random forset from CPU to GPU
#Set kernel before running

import cudf
import cuml
from cuml.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import time

start_numpoints = 5000
end_numpoints = 50000
interval = 1000

results = []

for numpoints in range(start_numpoints, end_numpoints + 1, interval):
    start_time = time.time()
    print(f"Number of Points: {numpoints}")

    data = pd.read_csv(f'datasets/Energy/fuchs_v3_points_{numpoints}_noise_10.csv')

    features = data[['Intensity_(W_cm2)', 'Target_Thickness (um)', 'Focal_Distance_(um)']]
    target = data[['Max_Proton_Energy_(MeV)', 'Avg_Proton_Energy_(MeV)', 'Total_Proton_Energy_(MeV)']]

    # Split the dataset into train and test sets
    features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=0.2, random_state=42)

    # Convert pandas DataFrame to cuDF DataFrame
    features_train = cudf.from_pandas(features_train)
    features_test = cudf.from_pandas(features_test)

    # Train three separate random forest models for each target variable
    models = []
    target_test_preds = []
    mse_errors = []
    rmse_errors = []
    are_errors = []
    for i, column in enumerate(target.columns):
        target_train_curr = target_train[column]
        target_test_curr = target_test[column]

        # Convert target variables to cuDF Series
        target_train_curr = cudf.Series(target_train_curr.values)
        target_test_curr = cudf.Series(target_test_curr.values)

        # Initialize the Random Forest Regressor
        rf = cuml.ensemble.RandomForestRegressor(random_state=42)

        # Fit the model
        rf.fit(features_train, target_train_curr)

        # Predict on the test set
        target_test_pred = rf.predict(features_test)

        # Store the model and predictions
        models.append(rf)
        target_test_preds.append(target_test_pred)

        # Calculate the MSE, RMSE, and ARE for the current target variable
        mse_error = mean_squared_error(target_test_curr, target_test_pred)
        rmse_error = np.sqrt(mse_error)
        are_error = (mean_absolute_error(target_test_curr, target_test_pred) / np.mean(target_test_curr)) * 100

        # Print the MSE, RMSE, and ARE for the current target variable
        print(f'MSE for {column}: {mse_error}')
        print(f'RMSE for {column}: {rmse_error}')
        print(f'ARE for {column}: {are_error}%')

        mse_errors.append(mse_error)
        rmse_errors.append(rmse_error)
        are_errors.append(are_error)

    elapsed_time = time.time() - start_time  # calculate elapsed time

    # Store the results in a dictionary
    result = {
        'Number of Points': numpoints,
        'MSE Max_Proton_Energy': mse_errors[0],
        'RMSE Max_Proton_Energy': rmse_errors[0],
        'ARE Max_Proton_Energy': are_errors[0],
        'MSE Avg_Proton_Energy': mse_errors[1],
        'RMSE Avg_Proton_Energy': rmse_errors[1],
        'ARE Avg_Proton_Energy': are_errors[1],
        'MSE Total_Proton_Energy': mse_errors[2],
        'RMSE Total_Proton_Energy': rmse_errors[2],
        'ARE Total_Proton_Energy': are_errors[2],
        'Elapsed Time (seconds)': elapsed_time
    }

    results.append(result)

# Convert the results to a DataFrame
results_df = pd.DataFrame(results)

# Save the results to a CSV file
results_df.to_csv('results_Pitzer_3-parameter-gpu.csv', index=False)



UnsupportedCUDAError: Detected CUDA Runtime version is 9.2. Please update your CUDA Runtime to 11.0 or above.

## Random Forest - One Parameter

In [4]:
import cudf
import cuml
from cuml.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import time

start_numpoints = 5000
end_numpoints = 50000
interval = 1000

results = []

for numpoints in range(start_numpoints, end_numpoints + 1, interval):
    start_time = time.time()
    print(f"Number of Points: {numpoints}")

    data = cudf.read_csv(f'datasets/Energy/fuchs_v3_points_{numpoints}_noise_10.csv')

    features = data[['Intensity_(W_cm2)', 'Target_Thickness (um)', 'Focal_Distance_(um)']]
    target = data[['Max_Proton_Energy_(MeV)']]

    # Convert cuDF DataFrame to pandas DataFrame
    features = features.to_pandas()
    target = target.to_pandas()

    features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=0.2, random_state=42)

    # Convert pandas DataFrame back to cuDF DataFrame
    features_train = cudf.from_pandas(features_train)
    features_test = cudf.from_pandas(features_test)
    target_train = cudf.from_pandas(target_train)
    target_test = cudf.from_pandas(target_test)

    # Random Forest Regressor with optimized parameters
    rf = cuml.ensemble.RandomForestRegressor(n_estimators=400, max_depth=20, min_samples_split=2, min_samples_leaf=1, max_features='auto', random_state=42)

    # Fit the model on the training data
    rf.fit(features_train, target_train)

    # Predict on the test set
    target_test_pred = rf.predict(features_test)

    # Calculate the MSE
    mse_error = mean_squared_error(target_test.to_pandas().values, target_test_pred.to_pandas().values)

    # Calculate the RMSE
    rmse_error = np.sqrt(mse_error)

    # Calculate the ARE in percentage
    are_error = (mean_absolute_error(target_test.to_pandas().values, target_test_pred.to_pandas().values) / np.mean(target_test.to_pandas().values)) * 100

    elapsed_time = time.time() - start_time  # calculate elapsed time

    # Store the results in a dictionary
    result = {
        'numpoints': numpoints,
        'MSE': mse_error,
        'RMSE': rmse_error,
        'ARE': are_error,
        'Elapsed Time (seconds)': elapsed_time
    }

    results.append(result)

    print(f'MSE for Max_Proton_Energy_(MeV): {mse_error}')
    print(f'RMSE for Max_Proton_Energy_(MeV): {rmse_error}')
    print(f'ARE for Max_Proton_Energy_(MeV): {are_error}%')
    print("Elapsed time: {} seconds".format(elapsed_time))
    print()

# Save the results to a CSV file
results_df = pd.DataFrame(results)
results_df.to_csv('results_max_proton_energy_Pitzer_gpu.csv', index=False)

Number of Points: 5000


  return func(**kwargs)
  ret = func(*args, **kwargs)


MSE for Max_Proton_Energy_(MeV): 0.005562409062924429
RMSE for Max_Proton_Energy_(MeV): 0.07458155980485008
ARE for Max_Proton_Energy_(MeV): 11.675117368485646%
Elapsed time: 1.3542869091033936 seconds

Number of Points: 6000
MSE for Max_Proton_Energy_(MeV): 0.004611560560041382
RMSE for Max_Proton_Energy_(MeV): 0.06790847193127955
ARE for Max_Proton_Energy_(MeV): 10.67032714823164%
Elapsed time: 1.4001789093017578 seconds

Number of Points: 7000
MSE for Max_Proton_Energy_(MeV): 0.0036996320875679967
RMSE for Max_Proton_Energy_(MeV): 0.06082460100623757
ARE for Max_Proton_Energy_(MeV): 10.381282267220978%
Elapsed time: 1.3263635635375977 seconds

Number of Points: 8000
MSE for Max_Proton_Energy_(MeV): 0.004033040747502505
RMSE for Max_Proton_Energy_(MeV): 0.06350622605306117
ARE for Max_Proton_Energy_(MeV): 9.81527116017859%
Elapsed time: 1.3993871212005615 seconds

Number of Points: 9000
MSE for Max_Proton_Energy_(MeV): 0.003382806623301987
RMSE for Max_Proton_Energy_(MeV): 0.05816190

MSE for Max_Proton_Energy_(MeV): 0.003013746668407983
RMSE for Max_Proton_Energy_(MeV): 0.05489760166353338
ARE for Max_Proton_Energy_(MeV): 9.067128398048887%
Elapsed time: 2.3277323246002197 seconds

Number of Points: 43000
MSE for Max_Proton_Energy_(MeV): 0.002992807546389429
RMSE for Max_Proton_Energy_(MeV): 0.05470655853176499
ARE for Max_Proton_Energy_(MeV): 8.789499234286845%
Elapsed time: 2.4296329021453857 seconds

Number of Points: 44000
MSE for Max_Proton_Energy_(MeV): 0.0030781310642214593
RMSE for Max_Proton_Energy_(MeV): 0.055480907204383915
ARE for Max_Proton_Energy_(MeV): 8.965016326024308%
Elapsed time: 2.6791417598724365 seconds

Number of Points: 45000
MSE for Max_Proton_Energy_(MeV): 0.0029647816663511757
RMSE for Max_Proton_Energy_(MeV): 0.05444980868975736
ARE for Max_Proton_Energy_(MeV): 8.914741459292463%
Elapsed time: 2.2780261039733887 seconds

Number of Points: 46000
MSE for Max_Proton_Energy_(MeV): 0.003005485465355772
RMSE for Max_Proton_Energy_(MeV): 0.054