# Import libraries

In [None]:
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image, ImageChops
from tqdm import tqdm

from deeplcms_functions import convert_lcms_files, inspect_database, utils

# Inspect some images

In [None]:
example_file = utils.Configuration.RAW_DATA_PATH.joinpath(
    "ST001618_Opium_study_LC_MS/U_1.mzML"
)

In [None]:
convert_lcms_files.plot_2D_spectra_overview(example_file, save=False, nx=1000, ny=1000)

![Elephant](experimental_plan.jpg){fig-alt="Expeimental plan." fig-align="center" width=50%}


# Step 1: Create directories

In [None]:
%%script echo skipping

convert_lcms_files.create_train_val_test_directories(
    study_name="ST001618_Opium_study_LC_MS_500",
    path=utils.Configuration.RAW_DATA_PATH,
    group_1="Non-User",
    group_2="User",
)

# Step 2: Split data into train/test/val 

In [None]:
file_path = utils.Configuration.RAW_DATA_PATH.joinpath("sample_list.parquet.gzip")

# pd.read_parquet(file_path)
train_test_val_split_df = convert_lcms_files.get_train_val_test_split(
    path=file_path, test_portion=0.1, val_portion=0.2
)
print(train_test_val_split_df)
train_test_val_split_df.groupby("split").phenotype.value_counts()

# Cheking a few examples 

In [None]:
data_folder = utils.Configuration.RAW_DATA_PATH.joinpath("ST001618_Opium_study_LC_MS")

fig = plt.figure(figsize=(6, 6))
rows, cols = 3, 3

for idx, file_ in enumerate(train_test_val_split_df.sample_name.sample(9), 1):
    fig.add_subplot(rows, cols, idx)
    file = Path(f"{data_folder.joinpath(file_)}.mzML")
    plt.imshow(
        convert_lcms_files.plot_2D_spectra_overview(file, save=False, nx=1000, ny=1000)
    )
    plt.title(
        train_test_val_split_df.query("sample_name == @file_").phenotype.values[0]
    )
    plt.axis(False)

plt.tight_layout()
plt.show()

# Step 3: Convert LCMS files and move images to target directories

In [None]:
%%script echo skipping

convert_lcms_files.convert_LCMS_files_and_move_images(
    source_folder=utils.Configuration.RAW_DATA_PATH.joinpath(
        "ST001618_Opium_study_LC_MS"
    ),
    df=train_test_val_split_df,
    destination_folder=utils.Configuration.RAW_DATA_PATH.joinpath(
        "ST001618_Opium_study_LC_MS_1000"
    ),
    target_col="phenotype",
    nx=1000,
    ny=1000,
)

# Manual data augmentation

In [None]:
location = np.random.choice(
    list(utils.Configuration.RAW_DATA_PATH.joinpath("image_examples").glob("*.jpeg"))
)

convert_lcms_files.augment_images(location, save=False)

In [None]:
location = list(
    utils.Configuration.RAW_DATA_PATH.joinpath(
        "ST001618_Opium_study_LC_MS_500_augmented/train"
    ).glob("*/*.jpeg")
)
location