In [1]:
# Packages

import pandas as pd
import pickle
import glob
import os
import numpy as np
import matplotlib.pyplot as plt
import jax.numpy as jnp
import shutil
import re

In [None]:
VEXT_MAX = 10.1         # 10.1
VEXT_MIN = -15  # -15 unten global "-14.744"
N_BINS   = 100

base_dir = "/Users/danielbock/MASTERTHESIS/MASTA/DataArchiv/Vext_allTEMP"
pattern  = re.compile(r"Vext_([A-Z]{3})_(\d+)\.pkl")

all_dfs = []
counter = 0
# temp ordner iterieren
for temp_folder in os.listdir(base_dir):
    temp_path = os.path.join(base_dir, temp_folder)
    if not (os.path.isdir(temp_path) and temp_folder.startswith("Vext_")):
        continue

    print(f"\nBearbeite Temperatur-Ordner: {temp_folder}")
    rows = []

    # alle pickle daten
    for filename in os.listdir(temp_path):
        if not filename.endswith(".pkl"):
            continue

        m = pattern.match(filename)
        if not m:
            print(f"Übersprungen (kein gültiger Name): {filename}")
            continue

        struct_name, temp = m.groups()
        file_path = os.path.join(temp_path, filename)

        # Entpickeln
        with open(file_path, "rb") as f:
            data = pickle.load(f)

        arr = np.asarray(data, dtype=np.float64).squeeze()
        arr = arr[np.isfinite(arr)]  # NaN/Inf entfernen

        if arr.size != 64**3:
            counter += 1
        #print(counter)

        # Mask und exp(-)
        if arr.size == 0:
            hist = np.zeros(N_BINS, dtype=int)
            edges = np.linspace(0, 1, N_BINS + 1)
        else:
            arr = np.clip(arr, VEXT_MIN, VEXT_MAX)
            # variante noexp / linBins
            #hist, edges = np.histogram(arr, bins=N_BINS, range=(VEXT_MIN, VEXT_MAX))
            # variante noexp / logBins
            #edges = np.logspace(VEXT_MIN, VEXT_MAX, N_BINS + 1)
            #hist, edges = np.histogram(arr, bins=edges) # automatisch äquistante bins
            # variante exp / logBins
            #edges = np.logspace(np.log(np.exp(-VEXT_MAX)), np.log(np.exp(-VEXT_MIN)), N_BINS + 1)
            hist, edges = np.histogram(np.exp(-arr), bins=N_BINS, range=(0, np.exp(10))) # automatisch äquistante bins
            #clipped
            #
            # variante 


        # struktur
        entry = {"structure_name": struct_name, "temperature_kelvin": int(temp)}
        entry.update({f"bin_{i}": int(v) for i, v in enumerate(hist)})
        entry["x_max"] = float(edges[-1])
        entry["x_min"] = float(edges[0])
        rows.append(entry)

    # -
    if rows:
        df_temp = pd.DataFrame(rows)
        all_dfs.append(df_temp)
        print(f"{len(rows)} Strukturen verarbeitet")

# df_all zusammenführen
df_all = pd.concat(all_dfs, ignore_index=True) if all_dfs else pd.DataFrame()
print(f"\n Gesamt-DataFrame mit {len(df_all)} Zeilen erstellt")

# csv
#out_name = f"Vext_allTEMP_exp_rangelogBin_{VEXT_MAX}_{VEXT_MIN}_{N_BINS}Bins.csv"
out_name = f"Vext_allTEMP_64grid_100b_exp.csv"
out_path = os.path.join(base_dir, out_name)
df_all.to_csv(out_path, index=False)

print(f"Datei gespeichert unter:\n{out_path}")