Packages

In [1]:
import pandas as pd
import pickle
import glob
import os
import numpy as np
import matplotlib.pyplot as plt
import jax.numpy as jnp
import shutil
import re

Sort by Temp

In [2]:
base_dir = "/Users/danielbock/MASTERTHESIS/MASTA/DataArchiv/Vext_allTEMP_64grid_swing/"

pattern = re.compile(r"^Vext_([A-Z]{3})_(\d+)\.pkl$")

for filename in os.listdir(base_dir):
    if not filename.endswith(".pkl"):
        continue

    match = pattern.match(filename)
    if not match:
        continue

    structure, temp = match.groups()
    temp_folder = f"Vext_{temp}"  # z. B. "Vext_400"
    temp_path = os.path.join(base_dir, temp_folder)

    os.makedirs(temp_path, exist_ok=True)

    src = os.path.join(base_dir, filename)
    dst = os.path.join(temp_path, filename)

    shutil.move(src, dst)

Vext --> Hist 

In [None]:
VEXT_MAX = 10.1
VEXT_MIN = -15
N_BINS   = 95

base_dir = "/Users/danielbock/MASTERTHESIS/MASTA/DataArchiv/Vext_allTEMP_64grid_swing/"
pattern  = re.compile(r"Vext_([A-Z]{3})_(\d+)\.pkl")  

all_dfs = []
counter_badshape = 0

for temp_folder in os.listdir(base_dir):
    temp_path = os.path.join(base_dir, temp_folder)
    # nur Ordner, die mit "Vext_" beginnen (z. B. Vext_300K etc.)
    if not (os.path.isdir(temp_path) and temp_folder.startswith("Vext_")):
        continue

    print(f"\nBearbeite Temperatur-Ordner: {temp_folder}")
    rows = []

    for filename in os.listdir(temp_path):
        if not filename.endswith(".pkl"):
            continue

        m = pattern.match(filename)
        if not m:
            print(f"Übersprungen (kein gültiger Name): {filename}")
            continue

        struct_name, temp = m.groups()
        temp_k = float(temp)
        file_path = os.path.join(temp_path, filename)

        with open(file_path, "rb") as f:
            data = pickle.load(f)

        arr = np.asarray(data, dtype=np.float64).squeeze()
        arr = arr[np.isfinite(arr)]

        if arr.size != 64**3:
            counter_badshape += 1  

        hist, edges = np.histogram(arr, bins=N_BINS, range=(VEXT_MIN, VEXT_MAX))

        entry = {
            "structure_name": struct_name,
            "temperature_kelvin": temp_k,
            **{f"bin_{i}": int(v) for i, v in enumerate(hist)},
            "x_min": float(edges[0]),
            "x_max": float(edges[-1]),
        }
        rows.append(entry)

    if rows:
        df_temp = pd.DataFrame(rows)
        all_dfs.append(df_temp)
        print(f"{len(rows)} Strukturen verarbeitet")

df_all = pd.concat(all_dfs, ignore_index=True) if all_dfs else pd.DataFrame()
print(f"\nGesamt-DataFrame mit {len(df_all)} Zeilen erstellt")
print(f"Anzahl Dateien mit unerwarteter Größe: {counter_badshape}")

out_name = "Vext_XYZ.csv"
out_path = os.path.join('/Users/danielbock/MASTERTHESIS/MASTA/DataArchiv/', out_name)
df_all.to_csv(out_path, index=False)
print(f"Datei gespeichert unter:\n{out_path}")