In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt



In [None]:
df_iso1 = pd.read_csv('/data/yll6162/mof_cnn/PSED_data/extracted_cm3_per_cm3_values_part1.csv')
df_iso2 = pd.read_csv('/data/yll6162/mof_cnn/PSED_data/extracted_cm3_per_cm3_values_part2.csv')
df_iso3 = pd.read_csv('/data/yll6162/mof_cnn/PSED_data/extracted_cm3_per_cm3_values_part3.csv')
df_iso = pd.concat([df_iso1, df_iso2, df_iso3], ignore_index=True)
df_iso

In [None]:
df_iso['database'] = df_iso['project'].apply(lambda x: 'qmof' if x.startswith('qmof') else 'ToBaCCo')

In [None]:
df_iso['Xe_cm3_per_cm3_value'].describe()

In [None]:
# Overlay histogram using pandas
min_value = df_iso['Xe_cm3_per_cm3_value'].min()
max_value = df_iso['Xe_cm3_per_cm3_value'].max()
# bin_edges = np.linspace(min_value, max_value, 10)
bin_edges = np.linspace(0, 200, 11)
# bar_width = 0.8 * (bin_edges[1] - bin_edges[0])
bar_width = 1 * (bin_edges[1] - bin_edges[0])
df_iso.groupby('database')['Xe_cm3_per_cm3_value'].plot(kind='hist', bins=bin_edges, alpha=0.8, legend=True, width=bar_width, edgecolor='grey')
plt.grid(True)
plt.xlabel('Xe_cm3_per_cm3_value')
plt.ylabel('Frequency')
plt.title('Isotherms histogram grouped by database')
plt.xticks(bin_edges)
plt.show()

In [None]:
import os

# Define the top-level directory
base_dir = "/data/yll6162/mof_cnn/PSED_data/New_Parsed" ## REPLACE WITH YOUR DIRECTORY

# Define the relative subdirectory and file names to check
subdir = "ASCI_Grids"
file_names = ["energy_grid.txt"]
abnormal_files = []
empty_files = []
# Loop through all subdirectories under the base directory
for folder in os.listdir(base_dir):
    folder_path = os.path.join(base_dir, folder)
    
    # Ensure it is a directory
    if os.path.isdir(folder_path):
        asci_grids_path = os.path.join(folder_path, subdir)
        
        # Ensure the ASCI_Grids subdirectory exists
        if os.path.exists(asci_grids_path):
            print(f"\nProcessing ASCI_Grids in folder: {folder}")
            
            # Check for each required file
            for file_name in file_names:
                file_path = os.path.join(asci_grids_path, file_name)
                
                # Verify the file exists
                if os.path.exists(file_path):
                    print(f"\nReading file: {file_name} in folder {folder}")
                    try:
                        # Open and print each line of the file
                        total_count = 0

                        with open(file_path, 'r') as file:
                            lines = file.readlines()
                            if len(lines) != 68921:
                                if len(lines) == 0:
                                    empty_files.append(asci_grids_path)

                                print(f"Error: {file_name} in folder {folder} has {len(lines)} lines")
                                abnormal_files.append(asci_grids_path)
                                break
                    except Exception as e:
                        print(f"Error reading {file_name} in folder {folder}: {e}")
                else:
                    print(f"{file_name} does not exist in folder: {folder}")
        else:
            print(f"ASCI_Grids subdirectory does not exist in folder: {folder}")

In [None]:
len(abnormal_files),len(empty_files)

In [None]:
import numpy as np
mof_name = 'f1_uko_26_1x1x1'    
with open(f"/data/yll6162/mof_cnn/PSED_data/New_Parsed/{mof_name}/ASCI_Grids//energy_grid.txt", "r") as file:
    lines = file.readlines()  # Each line is stored as an element in the list

# Step 1: Convert list to NumPy array
data_array = np.array(lines, dtype=object)


# Step 2: Convert all valid numeric values to float while ignoring '?' entries
numeric_array = np.where(data_array != '?\n', data_array, 0).astype(float)

# Step 3: Find the maximum value, ignoring NaNs
original_max = np.nanmax(numeric_array)

# Step 4: Replace '?' with the original maximum value
cleaned_data = np.where(data_array == '?\n', original_max, data_array).astype(float)

size = 41
reshaped_array = cleaned_data.reshape((size, size, size))
assert np.isnan(reshaped_array).sum()==0

In [None]:
X = np.load(f'/data/yll6162/data/train/clean.npy', mmap_mode='r')
X.shape
sample = X[50]


In [None]:
# Calculate histogram, max, and min
#  = sample
hist, bin_edges = np.histogram(array_3d, bins=20)  # Histogram with 20 bins
array_max = np.max(array_3d)  # Maximum value
array_min = np.min(array_3d)  # Minimum value

# Print the results
print("Maximum Value:", array_max)
print("Minimum Value:", array_min)
print("Histogram Bin Counts:", hist)
print("Histogram Bin Edges:", bin_edges)

# Display histogram
plt.figure(figsize=(10, 6))
plt.hist(array_3d.flatten(), bins=20, color='blue', alpha=0.7)
plt.title('Histogram of 3D Array Values')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()

In [None]:
import plotly.graph_objects as go
import numpy as np

# Example 3D array
# array_3d = np.random.rand(10, 10, 10)
array_3d = reshaped_array
capped_max = np.percentile(array_3d, 98)
array_3d = np.clip(array_3d, None, capped_max)

# array_3d = sample
# Coordinates for the 3D volume
x = np.linspace(0, 40, array_3d.shape[0])
y = np.linspace(0, 40, array_3d.shape[1])
z = np.linspace(0, 40, array_3d.shape[2])
X, Y, Z = np.meshgrid(x, y, z, indexing="ij")


data_min = np.min(array_3d)
data_max = np.max(array_3d)
custom_colorscale = [
    [0, "blue"],              
    [(0 - data_min) / (data_max - data_min), "white"],  # Map 0.0 to white
    [1, "red"]                
]


# Create a 3D volume rendering
fig = go.Figure(data=go.Volume(
    x=X.flatten(),
    y=Y.flatten(),
    z=Z.flatten(),
    value=array_3d.flatten(),
    isomin=np.min(array_3d),
    isomax=np.max(array_3d),
    opacity=0.2,  # Adjust opacity
    colorscale=custom_colorscale,
    surface_count=15  # Number of surfaces
))

# Update layout for larger display
fig.update_layout(
    scene=dict(
        xaxis=dict(range=[0, 40]),
        yaxis=dict(range=[0, 40]),
        zaxis=dict(range=[0, 40]),
        aspectratio=dict(x=1, y=1, z=1)  # Keep aspect ratio consistent
    ),
    width=1000,  # Set figure width
    height=1000,  # Set figure height
    title_text ='MOF-f1_uko_26_1x1x1'  
)

fig.show()