In [1]:
import pandas as pd

data_path = "../../nuclear_data/cmsr_dataset_material_movement.h5"
df_base = pd.read_hdf(data_path)

In [4]:
# Get all columns starting with 'fuel_'
fuel_columns = [col for col in df_base.columns if col.startswith('fuel_')]
fuel_columns = [col for col in fuel_columns if col not in ['fuel_TOT_GS', 'fuel_TOT_DH', 'fuel_TOT_A']]

# Create a DataFrame with statistics for these columns
stats_df = pd.DataFrame({
    'mean': df_base[fuel_columns].mean(),
    'median': df_base[fuel_columns].median(),
    'std': df_base[fuel_columns].std(),
    'max': df_base[fuel_columns].max(),
    'min': df_base[fuel_columns].min()
})

# Sort by median and add a rank column
sorted_stats = stats_df.sort_values('median', ascending=False).reset_index()
sorted_stats = sorted_stats.rename(columns={'index': 'isotope'})
sorted_stats.insert(0, 'rank', range(1, len(sorted_stats) + 1))

# Format scientific notation with 4 decimal places
for col in ['mean', 'median', 'std', 'max', 'min']:
    sorted_stats[col] = sorted_stats[col].apply(lambda x: f'{x:.4e}')

# Save to CSV
output_path = 'isotopes_ranking.csv'
sorted_stats.to_csv(output_path, index=False)

print(f"Rankings saved to {output_path}")
print("\nTop 10 isotopes by median density:")
print(sorted_stats.head(10))

Rankings saved to isotopes_ranking.csv

Top 10 isotopes by median density:
   rank     isotope        mean      median         std         max  \
0     1   fuel_U238  1.9239e+00  1.9233e+00  6.7318e-02  2.0492e+00   
1     2    fuel_F19  1.1957e+00  1.1958e+00  2.5140e-04  1.1962e+00   
2     3   fuel_Na23  3.9712e-01  3.9712e-01  8.9246e-05  3.9727e-01   
3     4   fuel_U235  3.0937e-01  3.0949e-01  7.1019e-02  4.5039e-01   
4     5    fuel_K39  2.6710e-01  2.6714e-01  1.4672e-04  2.6733e-01   
5     6    fuel_K41  2.0266e-02  2.0267e-02  9.9569e-06  2.0282e-02   
6     7  fuel_Pu239  7.5423e-03  6.8421e-03  6.8644e-03  2.0921e-02   
7     8   fuel_U236  6.7960e-03  5.7703e-03  6.3745e-03  2.0120e-02   
8     9   fuel_U234  2.9925e-03  2.9893e-03  6.8391e-04  4.3200e-03   
9    10  fuel_Xe134  1.0694e-03  8.7044e-04  1.0201e-03  3.0390e-03   

          min  
0  1.7935e+00  
1  1.1953e+00  
2  3.9691e-01  
3  1.5320e-01  
4  2.6665e-01  
5  2.0237e-02  
6  0.0000e+00  
7  0.0000e+00  

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Create a box plot for the fuel columns (log scale)
plt.figure(figsize=(15, 6))
plt.yscale('log')
df_base[fuel_columns].boxplot()
plt.xticks(rotation=45, ha='right')
plt.title('Distribution of Fuel Isotopes (Log Scale)')
plt.ylabel('Density')
plt.tight_layout()
plt.show()

In [5]:
# List of isotopes we want to keep
sf_isotopes = ['U238', 'U235', 'Pu239', 'U236', 'U234']

# Create list of column names to keep
fuel_columns = ['fuel_' + isotope for isotope in sf_isotopes]
additional_columns = ['AN', 'SF', 'fuel_TOT_GS', 'fuel_TOT_DH', 'fuel_TOT_A']
columns_to_keep = fuel_columns + additional_columns

# Create new dataframe with only selected columns
df_filtered = df_base[columns_to_keep]

In [6]:
df_shuffled = df_filtered.sample(frac=1, random_state=42).reset_index(drop=True)

In [7]:
df_shuffled.to_csv('top10_nuclear.csv', index=False)

In [6]:
import openmc.data

# List of columns to convert
isotope_columns = [
    'fuel_U238', 'fuel_Pu236', 'fuel_Pu238', 'fuel_Pu239', 'fuel_Pu240',
    'fuel_Pu241', 'fuel_Pu242', 'fuel_Pu244', 'fuel_Am241', 'fuel_Cm242',
    'fuel_Cm244', 'fuel_Cm246', 'fuel_Cm248', 'fuel_Cm250', 'fuel_Bk249'
]

# Function to get isotope name in OpenMC format
def format_isotope_name(col_name):
    # Remove 'fuel_' prefix and split element and mass number
    isotope = col_name.replace('fuel_', '')
    element = ''.join(filter(str.isalpha, isotope))
    mass_number = ''.join(filter(str.isdigit, isotope))
    return f"{element}{mass_number}"

# Convert each column from density to mass
for column in isotope_columns:
    isotope = format_isotope_name(column)
    atomic_mass = openmc.data.atomic_mass(isotope)
    df_shuffled[column] = df_shuffled[column] * atomic_mass

ModuleNotFoundError: No module named 'openmc'

In [None]:
df_shuffled

In [5]:
df_shuffled.to_csv('uppsala_neuralnet.csv', index=False)