<a href="https://colab.research.google.com/github/birrulwldain/CNNLIBSpython/blob/main/Torrent_To_Google_Drive_Downloader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Langkah 1: Instal pustaka yang diperlukan
!pip install h5py scipy numpy

# Langkah 2: Impor pustaka
import numpy as np
import h5py
import os
from scipy.signal import find_peaks
from google.colab import drive, files

# Langkah 3: Mount Google Drive untuk menyimpan hasil
drive.mount('/content/gdrive')

# Langkah 4: Unggah file pure_spectra.h5
print("Silakan unggah file pure_spectra.h5")
uploaded = files.upload()

# Pastikan file telah diunggah
h5_path = "/content/pure_spectra.h5"
if not os.path.exists(h5_path):
    raise FileNotFoundError("File pure_spectra.h5 tidak ditemukan! Pastikan Anda mengunggah file tersebut.")

# Langkah 5: Buat direktori untuk menyimpan hasil augmentasi di Google Drive
output_dir = "/content/gdrive/My Drive/libs_lstm/data/processed/augmented"
os.makedirs(output_dir, exist_ok=True)

# Fungsi untuk memuat spektrum
def load_spectra(h5_path):
    """
    Load pure spectra from HDF5 file for Ca I only.
    Returns: List of (spectra, labels, wavelengths, element, ion, temp) tuples.
    """
    print(f"Opening HDF5 file: {h5_path}")
    data = []
    with h5py.File(h5_path, "r") as f:
        expected_groups = 5  # Hanya 5 spektrum untuk Ca I
        for group in f.keys():
            try:
                parts = group.split("_")
                if len(parts) != 3:
                    print(f"Skipping {group}: Invalid format")
                    continue
                element, ion, temp = parts
                ion = int(ion)
                temp = int(float(temp.replace("K", "")))

                # Hanya ambil data untuk Ca I
                if element != "Ca" or ion != 1:
                    continue

                spectrum_group = f[group]["spectrum"]
                if "block0_values" not in spectrum_group:
                    print(f"Skipping {group}: No block0_values in spectrum")
                    continue
                spectrum_data = spectrum_group["block0_values"][:]
                intensity = spectrum_data[:, 1]
                intensity = (intensity - intensity.min()) / (intensity.max() - intensity.min())
                wavelengths = spectrum_data[:, 0]

                labels_group = f[group]["labels"]
                if "block1_values" not in labels_group:
                    print(f"Skipping {group}: No block1_values in labels")
                    continue
                labels = labels_group["block1_values"][:].flatten()

                data.append((intensity, labels, wavelengths, element, ion, temp))
                print(f"Loaded group {group}")
            except Exception as e:
                print(f"Error in {group}: {e}")
                continue
        print(f"Total groups loaded: {len(data)}/{expected_groups}")
        if len(data) != expected_groups:
            print(f"Warning: Expected {expected_groups} groups, got {len(data)}")
        if not data:
            raise ValueError("No valid data loaded from HDF5 file")
    return data

# Fungsi untuk menerapkan efek Doppler dan Stark
def apply_doppler_stark_broadening(spectrum, wavelengths, temp, ne_scale_range=(1e15, 1e18), temp_variation=0.05, add_baseline=True, add_noise=True, add_intensity_scale=True):
    """
    Apply Doppler and Stark broadening with additional noise.
    Parameters:
    - spectrum: Original spectrum intensity.
    - wavelengths: Wavelength array (in nm).
    - temp: Plasma temperature (in K).
    - ne_scale_range: Tuple (min, max) for electron density (in cm^-3).
    - temp_variation: Fractional variation in temperature (± fraction).
    - add_baseline: If True, add a small polynomial baseline.
    - add_noise: If True, add Gaussian noise.
    - add_intensity_scale: If True, add intensity scaling.
    """
    # 1. Variasikan suhu dan kerapatan elektron untuk simulasi
    temp_var = np.random.uniform(1 - temp_variation, 1 + temp_variation)
    temp_adjusted = temp * temp_var
    ne = np.random.uniform(ne_scale_range[0], ne_scale_range[1])  # Kerapatan elektron (cm^-3)

    # 2. Deteksi puncak emisi dengan parameter yang lebih sensitif
    peaks, _ = find_peaks(spectrum, height=0.02, distance=5, prominence=0.01)  # Deteksi puncak kecil
    if len(peaks) == 0:
        print("Warning: No peaks detected in spectrum!")
        return spectrum

    # 3. Hitung lebar Doppler (Gaussian)
    c = 3e8  # Kecepatan cahaya (m/s)
    k = 1.38e-23  # Konstanta Boltzmann (J/K)
    m = 6.64e-26  # Massa atom Ca (kg)
    lambda_0 = wavelengths[peaks]  # Panjang gelombang puncak (nm)
    delta_lambda_d = (lambda_0 * 1e-9 / c) * np.sqrt(8 * k * temp_adjusted * np.log(2) / m) * 1e9  # Konversi ke nm

    # 4. Hitung lebar Stark (Lorentzian)
    w = 0.005  # Parameter Stark untuk Ca I
    delta_lambda_s = w * (ne / 1e16)  # Lebar Stark (nm)

    # 5. Kombinasi lebar (sederhana: gunakan Lorentzian dengan lebar efektif)
    delta_lambda = delta_lambda_d + delta_lambda_s  # Aproksimasi lebar efektif

    # 6. Buat spektrum baru dengan pelebaran garis
    synthetic_spectrum = np.zeros_like(spectrum)
    dwl = np.mean(np.diff(wavelengths))  # Resolusi panjang gelombang
    for i, peak_idx in enumerate(peaks):
        # Profil Lorentzian untuk pelebaran
        sigma = delta_lambda[i] / 2  # Lebar Lorentzian (FWHM -> sigma)
        wl_center = wavelengths[peak_idx]
        intensity = spectrum[peak_idx]
        lorentzian = intensity * (sigma**2 / ((wavelengths - wl_center)**2 + sigma**2))
        synthetic_spectrum += lorentzian

    # 7. Tambahkan baseline kecil (opsional)
    if add_baseline:
        x = np.linspace(-1, 1, len(spectrum))
        baseline_scale = 0.0001  # Baseline kecil (~0.01)
        a = np.random.uniform(-baseline_scale, baseline_scale)
        b = np.random.uniform(-baseline_scale/2, baseline_scale/2)
        c = np.random.uniform(0, baseline_scale/2)
        baseline = a * x**2 + b * x + c
        synthetic_spectrum = synthetic_spectrum + baseline

    # 8. Tambahkan skala intensitas (opsional)
    if add_intensity_scale:
        intensity_scale = np.random.uniform(0.8, 1.0)  # Variasi ±20%
        synthetic_spectrum = synthetic_spectrum * intensity_scale

    # 9. Tambahkan noise Gaussian (opsional)
    if add_noise:
        noise_scale = 0.0001  # 1% dari intensitas maksimum
        noise = np.random.normal(0, noise_scale, spectrum.shape)
        synthetic_spectrum = synthetic_spectrum + noise

    # 10. Normalisasi ulang spektrum
    synthetic_spectrum = (synthetic_spectrum - synthetic_spectrum.min()) / (synthetic_spectrum.max() - synthetic_spectrum.min())
    synthetic_spectrum = np.clip(synthetic_spectrum, 0, 1)
    return synthetic_spectrum

# Fungsi untuk menghasilkan label
def generate_labels(spectrum, temp):
    """
    Generate binary labels with adaptive threshold based on temperature.
    """
    base_threshold = np.median(spectrum) + 2 * np.std(spectrum)
    temp_factor = temp / 6000  # Skala berdasarkan suhu (normalisasi terhadap suhu terendah)
    threshold = base_threshold * temp_factor
    return (spectrum > threshold).astype(np.int32)

# Fungsi untuk augmentasi data
def augment_with_doppler_stark(data, output_dir, element, ion, split, global_counters, temp_step=500):
    """
    Augment spectra for Ca I using Doppler and Stark broadening with 500K temperature steps and additional noise.
    """
    print(f"Starting augmentation for {element} {ion} ({split} split)...")
    print(f"Number of spectra loaded: {len(data)}")
    if len(data) == 0:
        raise ValueError(f"No data to augment for {element} {ion}!")

    # Tentukan suhu baru dengan langkah 500 K
    min_temp = 6000
    max_temp = 30000
    temp_range = np.arange(min_temp, max_temp + temp_step, temp_step)
    num_temps = len(temp_range)  # Total suhu (49 untuk 6000K hingga 30000K dengan langkah 500K)
    total_spectra = 2000  # Total spektrum yang diinginkan
    spectra_per_temp = total_spectra // num_temps  # ~40 spektrum per suhu
    print(f"Generating spectra for {num_temps} temperatures ({min_temp}K to {max_temp}K) with {spectra_per_temp} spectra per temperature")

    output_h5 = os.path.join(output_dir, f"{split}.h5")
    split_size = int(spectra_per_temp * num_temps * {"train": 0.7, "validation": 0.15, "test": 0.15}[split])
    print(f"Target size for {split} split: {split_size}")

    # Buka file H5 dalam mode append
    with h5py.File(output_h5, "a") as f_out:
        count = 0
        for target_temp in temp_range:
            # Temukan spektrum asli terdekat untuk digunakan sebagai template
            closest_data = min(data, key=lambda x: abs(x[5] - target_temp))
            intensity, orig_labels, wavelengths, element, ion, _ = closest_data
            print(f"Generating spectra for {element}_{ion}_{target_temp}K using template from {closest_data[5]}K")

            for aug_idx in range(spectra_per_temp):
                if count >= split_size:
                    break
                # Terapkan pelebaran Doppler dan Stark dengan suhu target
                synthetic_spectrum = apply_doppler_stark_broadening(
                    intensity,
                    wavelengths,
                    target_temp,  # Gunakan suhu target
                    ne_scale_range=(1e15, 1e18),  # Rentang lebih besar
                    temp_variation=0.05,
                    add_baseline=True,
                    add_noise=True,
                    add_intensity_scale=True
                )

                synthetic_labels = generate_labels(synthetic_spectrum, target_temp)

                # Gunakan global counter untuk nama grup unik
                global_count = global_counters[split]
                group_name = f"{element}_{ion}_{target_temp}K_{global_count}"
                g = f_out.create_group(group_name)
                g.create_dataset("spectrum", data=synthetic_spectrum)
                g.create_dataset("labels", data=synthetic_labels)
                g.create_dataset("wavelengths", data=wavelengths)
                g.attrs["element"] = element
                g.attrs["ion"] = ion
                g.attrs["temp"] = target_temp

                print(f"Created group {group_name} in {split}.h5")
                global_counters[split] += 1
                count += 1
            if count >= split_size:
                break

    print(f"Added {count} spectra for {element} {ion} to {split}.h5")

# Main execution
print("Loading spectra...")
data = load_spectra(h5_path)

# Hanya untuk Ca I
element_ion_pairs = [("Ca", 1)]  # Hanya Ca I

# Inisialisasi global counters untuk setiap split
global_counters = {"train": 0, "validation": 0, "test": 0}

# Hapus file H5 lama jika ada
for split in ["train", "validation", "test"]:
    output_h5 = os.path.join(output_dir, f"{split}.h5")
    if os.path.exists(output_h5):
        os.remove(output_h5)
        print(f"Removed existing {split}.h5")

# Augmentasi untuk Ca I saja dengan langkah suhu 500K
for element, ion in element_ion_pairs:
    print(f"\n=== Augmenting for {element} {ion} ===")
    data_subset = [d for d in data if d[3] == element and d[4] == ion]
    print(f"Number of spectra for {element} {ion}: {len(data_subset)}")
    for split in ["train", "validation", "test"]:
        augment_with_doppler_stark(data_subset, output_dir, element, ion, split, global_counters, temp_step=500)

# Langkah 6: Verifikasi bahwa file telah disimpan
print("\nVerifikasi file yang dihasilkan:")
for split in ["train", "validation", "test"]:
    output_h5 = os.path.join(output_dir, f"{split}.h5")
    if os.path.exists(output_h5):
        print(f"{split}.h5 berhasil disimpan di {output_h5}")
    else:
        print(f"WARNING: {split}.h5 tidak ditemukan!")

In [None]:
# Langkah 1: Instal pustaka yang diperlukan
!pip install tensorflow tensorflow-addons h5py numpy matplotlib sklearn

# Langkah 2: Impor pustaka
import numpy as np
import h5py
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Precision, Recall
import tensorflow_addons as tfa
import matplotlib.pyplot as plt
from sklearn.utils.class_weight import compute_class_weight
from google.colab import drive, files
import os

# Langkah 3: Mount Google Drive
drive.mount('/content/gdrive')

# Langkah 4: Tentukan path ke dataset
base_path = "/content/gdrive/My Drive/libs_lstm/data/processed/augmented"
train_h5_path = f"{base_path}/train.h5"
val_h5_path = f"{base_path}/validation.h5"
test_h5_path = f"{base_path}/test.h5"

# Pastikan file ada
for path in [train_h5_path, val_h5_path, test_h5_path]:
    if not os.path.exists(path):
        raise FileNotFoundError(f"File {path} tidak ditemukan! Pastikan file ada di Google Drive atau unggah secara manual.")

# Langkah 5: Fungsi untuk memuat dataset
def load_dataset(h5_path):
    with h5py.File(h5_path, "r") as f:
        spectra = []
        labels = []
        for group in f.keys():
            spectrum = f[group]["spectrum"][:]
            label = f[group]["labels"][:]
            spectra.append(spectrum)
            labels.append(label)
    return np.array(spectra), np.array(labels)

# Langkah 6: Muat dataset
print("Memuat dataset...")
X_train, y_train = load_dataset(train_h5_path)
X_val, y_val = load_dataset(val_h5_path)
X_test, y_test = load_dataset(test_h5_path)

# Langkah 7: Reshape untuk LSTM
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Langkah 8: Hitung bobot kelas untuk menangani ketidakseimbangan label
y_train_flat = y_train.flatten()
class_weights = compute_class_weight("balanced", classes=np.array([0, 1]), y=y_train_flat)
class_weight_dict = {0: class_weights[0], 1: class_weights[1]}
print("Class weights:", class_weight_dict)

# Langkah 9: Buat model LSTM
model = Sequential([
    LSTM(128, input_shape=(X_train.shape[1], 1), return_sequences=True),
    Dropout(0.2),
    LSTM(64, return_sequences=False),
    Dropout(0.2),
    Dense(64, activation="relu"),
    Dense(y_train.shape[1], activation="sigmoid")
])
model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss="binary_crossentropy",
    metrics=["accuracy", Precision(), Recall(), tfa.metrics.F1Score(num_classes=2, average="macro")]
)

# Langkah 10: Latih model
print("Melatih model...")
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32,
    class_weight=class_weight_dict,
    verbose=1
)

# Langkah 11: Evaluasi model
test_metrics = model.evaluate(X_test, y_test, return_dict=True)
print("Test Metrics:", test_metrics)

# Langkah 12: Plot training history
plt.figure(figsize=(10, 5))
plt.plot(history.history["accuracy"], label="Train Accuracy")
plt.plot(history.history["val_accuracy"], label="Validation Accuracy")
plt.title("Training and Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plot_path = "/content/gdrive/My Drive/libs_lstm/plots/lstm_training_history_accuracy.png"
os.makedirs(os.path.dirname(plot_path), exist_ok=True)
plt.savefig(plot_path)
plt.show()

plt.figure(figsize=(10, 5))
plt.plot(history.history["f1_score"], label="Train F1-Score")
plt.plot(history.history["val_f1_score"], label="Validation F1-Score")
plt.title("Training and Validation F1-Score")
plt.xlabel("Epoch")
plt.ylabel("F1-Score")
plt.legend()
plot_path = "/content/gdrive/My Drive/libs_lstm/plots/lstm_training_history_f1.png"
plt.savefig(plot_path)
plt.show()

# Langkah 13: Simpan model
model_path = "/content/gdrive/My Drive/libs_lstm/models/lstm_model_advanced.h5"
os.makedirs(os.path.dirname(model_path), exist_ok=True)
model.save(model_path)
print(f"Model disimpan di {model_path}")