In [None]:
# Packages

import pandas as pd
import pickle
import glob
import os
import numpy as np
import matplotlib.pyplot as plt
import jax.numpy as jnp
import shutil
import re

VEXT_STR_XXX.pkl --> ORDNER Vext_XXX

In [None]:
# Basisordner mit allen Vext_*.pkl Dateien
base_dir = "/Users/danielbock/MASTERTHESIS/MASTA/DataArchiv/Vext_allTEMP_64grid_swing/"

# Regex-Muster: Vext_<STR>_<TEMP>.pkl
pattern = re.compile(r"^Vext_([A-Z]{3})_(\d+)\.pkl$")

# Alle Dateien im Basisordner durchgehen
for filename in os.listdir(base_dir):
    if not filename.endswith(".pkl"):
        continue

    match = pattern.match(filename)
    if not match:
        print(f"‚ö†Ô∏è  √úbersprungen (kein passender Name): {filename}")
        continue

    structure, temp = match.groups()
    temp_folder = f"Vext_{temp}"  # z. B. "Vext_400"
    temp_path = os.path.join(base_dir, temp_folder)

    # Ordner erstellen, falls nicht vorhanden
    os.makedirs(temp_path, exist_ok=True)

    # Pfade f√ºr Verschieben
    src = os.path.join(base_dir, filename)
    dst = os.path.join(temp_path, filename)

    # Datei verschieben
    shutil.move(src, dst)
    

print("Alle Dateien wurden nach Temperatur-Unterordnern sortiert.")


MIN/MAX GLOBAL PICKEL

In [None]:
import os
import pickle
import numpy as np

# Basisordner mit den Unterordnern (z. B. Vext_400/, Vext_500/, ...)
base_dir = "/Users/danielbock/MASTERTHESIS/MASTA/DataArchiv/Vext_allTEMP_32grid"

global_min = np.inf
global_max = -np.inf
count_files = 0

# Alle Unterordner durchlaufen
for folder in os.listdir(base_dir):
    temp_path = os.path.join(base_dir, folder)
    if not (os.path.isdir(temp_path) and folder.startswith("Vext_")):
        continue

    #print(f"\nüìÇ Verarbeite {folder} ...")

    # Alle .pkl-Dateien in diesem Ordner
    for filename in os.listdir(temp_path):
        if not filename.endswith(".pkl"):
            continue

        file_path = os.path.join(temp_path, filename)

        # Datei laden
        with open(file_path, "rb") as f:
            data = pickle.load(f)

        # In NumPy-Array umwandeln
        arr = np.asarray(data, dtype=np.float64).squeeze()

        # NaN/Inf entfernen
        arr = arr[np.isfinite(arr)]
        if arr.size == 0:
            continue

        # Lokales Min/Max
        local_min = arr.min()
        local_max = arr.max()

        # Globales Min/Max aktualisieren
        global_min = min(global_min, local_min)
        global_max = max(global_max, local_max)
        count_files += 1

        #print(f"  ‚úÖ {filename}: min={local_min:.3f}, max={local_max:.3f}")

# Gesamtergebnis
if count_files > 0:
    print(f"\n‚úÖ {count_files} Dateien verarbeitet")
    print(f"üå°Ô∏è  Globales Minimum: {global_min:.3f}")
    print(f"üå°Ô∏è  Globales Maximum: {global_max:.3f}")
else:
    print("‚ö†Ô∏è Keine .pkl-Dateien gefunden.")


VISUALISIERUNG VEXT RAW

In [None]:
with open("/Users/danielbock/MASTERTHESIS/MASTA/DataArchiv/Vext_allTEMP/Vext_400/Vext_MWW_400.pkl", "rb") as f:
    data = pickle.load(f)


cutoff = 10.1
mask = data < cutoff
data = jnp.array(data)
#data = np.exp(-data)
data_mask = data[mask]
print(data.shape)
print(data_mask.shape)

plt.figure(figsize=(6,4))
plt.hist(data_mask, bins=100, color="green", edgecolor="black")
plt.xlabel(r"$\beta V^{\mathrm{ext}}$")
plt.ylabel("Count")
#plt.ylim(0, 100000)
plt.title(f"Histogram of external potential")
plt.tight_layout()
#plt.savefig(
#    "beta_Vext_histogram_example_MWW2.png",
#    dpi=300,
#    bbox_inches="tight"
#)
plt.show()

VEXT HIST

In [None]:
import os
import re
import pickle
import numpy as np
import pandas as pd

VEXT_MAX = 10.1       # cutoff oben
VEXT_MIN = -15     # cutoff unten global "-14.744"
N_BINS   = 100

base_dir = "/Users/danielbock/MASTERTHESIS/MASTA/DataArchiv/Vext_allTEMP"
pattern  = re.compile(r"Vext_([A-Z]{3})_(\d+)\.pkl")

all_dfs = []
counter = 0
# temp ordner iterieren
for temp_folder in os.listdir(base_dir):
    temp_path = os.path.join(base_dir, temp_folder)
    if not (os.path.isdir(temp_path) and temp_folder.startswith("Vext_")):
        continue

    print(f"\nBearbeite Temperatur-Ordner: {temp_folder}")
    rows = []

    # alle pickle daten
    for filename in os.listdir(temp_path):
        if not filename.endswith(".pkl"):
            continue

        m = pattern.match(filename)
        if not m:
            print(f"√úbersprungen (kein g√ºltiger Name): {filename}")
            continue

        struct_name, temp = m.groups()
        file_path = os.path.join(temp_path, filename)

        # Entpickeln
        with open(file_path, "rb") as f:
            data = pickle.load(f)

        arr = np.asarray(data, dtype=np.float64).squeeze()
        arr = arr[np.isfinite(arr)]  # NaN/Inf entfernen

        if arr.size != 32**3:
            counter += 1
        print(counter)

        # Mask und exp(-)
        if arr.size == 0:
            hist = np.zeros(N_BINS, dtype=int)
            edges = np.linspace(0, 1, N_BINS + 1)
        else:
            #arr = np.clip(arr, VEXT_MIN, VEXT_MAX)
            #arr_exp = np.exp(-arr)
            hist, edges = np.histogram(arr, bins=N_BINS, range=(VEXT_MIN, VEXT_MAX)) # automatisch √§quistante bins

        # struktur
        entry = {"structure": struct_name, "temperature": int(temp)}
        entry.update({f"bin_{i}": int(v) for i, v in enumerate(hist)})
        entry["x_min"] = float(edges[-1])
        entry["x_max"] = float(edges[0])
        rows.append(entry)

    # -
    if rows:
        df_temp = pd.DataFrame(rows)
        all_dfs.append(df_temp)
        print(f"{len(rows)} Strukturen verarbeitet")

# df_all zusammenf√ºhren
df_all = pd.concat(all_dfs, ignore_index=True) if all_dfs else pd.DataFrame()
print(f"\n Gesamt-DataFrame mit {len(df_all)} Zeilen erstellt")

# csv
out_name = f"Vext_allTEMP_noexp_rangelinBin_cut{VEXT_MAX}_{VEXT_MIN}_{N_BINS}Bins.csv"
out_path = os.path.join(base_dir, out_name)
df_all.to_csv(out_path, index=False)

print(f"Datei gespeichert unter:\n{out_path}")

ANALYSE HIST

In [None]:
df = pd.read_csv("/Users/danielbock/MASTERTHESIS/MASTA/DataArchiv/Vext_allTEMP_32grid/Vext_allTEMP_noexp_rangelinBin_cut10.1_-15_100Bins_32grid.csv")

#min_val = df["bin_2"].min()
#max_val = df["bin_2"].max()

structure = "BCT"

# Zeilen dieser Struktur ausw√§hlen (falls mehrere Temperaturen vorhanden sind)
#df_sel = df[df["structure"] == structure]

# Wenn du nur eine bestimmte Temperatur willst:
df_sel = df[(df["structure_name"] == structure) & (df["temperature_kelvin"] == 300)]

# Sicherstellen, dass √ºberhaupt Daten da sind
if df_sel.empty:
    raise ValueError(f"Keine Daten f√ºr Struktur {structure} gefunden!")

# Wir nehmen hier die erste Zeile (z. B. Temperatur 400)
row = df_sel.iloc[0]

# Histogrammdaten extrahieren
bin_cols = [c for c in df.columns if c.startswith("bin_")]
y = row[bin_cols].values

# x-Achse rekonstruieren (gleichm√§√üig zwischen x_min und x_max)
x_max = row["x_min"]
x_min = row["x_max"]
x_edges = np.linspace(x_min, x_max, len(bin_cols) + 1)
x_centers = 0.5 * (x_edges[:-1] + x_edges[1:])  # Mitten der Bins

# Plot
plt.figure(figsize=(8, 5))
plt.plot(x_centers, y, marker="o", lw=1.5)
#plt.xlim(-15,9)
#plt.ylim(0, 4000)
plt.title(f"Histogramm f√ºr Struktur {structure} (Temp {int(row['temperature'])} K)")
plt.xlabel("Vext (x)")
plt.ylabel("H√§ufigkeit / Bin")
plt.grid(True)
plt.tight_layout()
plt.show()


DFT - CLEAN

In [None]:
# Features Entfernen

dft_data = pd.read_csv("/Users/danielbock/MASTERTHESIS/MASTA/DataArchiv/2025_10_5_DB_100Bins_DOTO.csv")

dft_data = dft_data.loc[:, ~dft_data.columns.str.isdigit()]

output_path = "/Users/danielbock/MASTERTHESIS/MASTA/DataArchiv/DFT_Data_clean_06_10.csv"
dft_data.to_csv(output_path, index=False)

In [None]:
# Klammern entfernen

df = pd.read_csv("/Users/danielbock/MASTERTHESIS/MASTA/DataArchiv/DFT_Data_clean_06_10.csv")

df["density_bulk"] = pd.to_numeric(
    df["density_bulk"].astype(str)
      .str.replace(r"^\s*\[\s*", "", regex=True)  # f√ºhrende '[' entfernen
      .str.replace(r"\s*\]\s*$", "", regex=True)  # schlie√üende ']' entfernen
      .str.strip(),
    errors="coerce"
)

print(df["density_bulk"].head(), df["density_bulk"].dtype)  # sollte float64 sein


#df["density_bulk"].values

df.to_csv("/Users/danielbock/MASTERTHESIS/MASTA/DataArchiv/DFT_Data_clean_06_10.csv", index=False)