# Pressure Sensor Dataset
Plot all the measurements and sort out the ones not useful

## Set up the environment

In [None]:
import pybis
import os
import pandas as pd
import glob
import matplotlib.pyplot as plt
from pathlib import Path
from matplotlib.figure import Figure

In [None]:
def get_token() -> str:
    """
    This function gets the OpenBIS API key
    which is provided by Renkulab through the "Secrets" as
    a file stored in `/secrets/User-PAT_openBIS.txt`
    """
    with open("/secrets/User-PAT_openBIS.txt") as token_provider:
        token = token_provider.read().strip()
        return token

get_token()

In [None]:
# add domain name, change the following to your own domain instance
server = "openbis-empa-test.ethz.ch"
ob_connection = pybis.Openbis(f"https://{server}")
# add Personal Access Token: Found in Admin UI > Tools > Access > Personal Access Token
# needs to be created if not done yet; owner will be the Registrator in openBIS

# PAT is confidential: Could this be handled with secrets?
ob_connection.set_token(get_token())

print(ob_connection)

In [None]:
os.getcwd()

## Download datasets
The following cell only to be runned if datasets are not yet downloaded

In [None]:
# get collection/ default experiment where data is located
coll = ob_connection.get_collection("/GROUPA_STEFFI/RENKU_USE_CASE/RENKU_USE_CASE_EXP_4")
# get objects within the collection
collection_objects = coll.get_objects()

datasets = ob_connection.get_datasets(collection=coll, type="RAW_DATA")

#Create datasets folder
ds_folder = "./datasets"
Path(ds_folder).mkdir(exist_ok=True)
#Donwload all datasets
for dataset in datasets:
    dataset.download(
        destination = ds_folder,        # download files to folder my_data/
        create_default_folders = False, # ignore the /original/DEFAULT folders made by openBIS
        wait_until_finished = False,    # download in background, continue immediately
        workers = 10                    # 10 downloads parallel (default)
    )

## Explore dataset
After we are done downloading datasets from OpenBIS, we can have a look at them locally.

Using `Pathlib.glob`, we can search for all files ending with ".csv" in `ds_folder`, which in our case is `./datasets`, the folder were we just downloaded all data.

In [None]:

file_list = list(Path(ds_folder).glob("*.csv")) + list(Path(ds_folder).glob("*.xlsx"))
print(f"We found {len(file_list)} files matching csv and xlsx")


Now we can have a look at the file list:

In [None]:
file_list


And finally, we can load one example file using Pandas:

Because we know the header is 8 lines long, we set `skiprows=8`.

In [None]:
#example of one file
df = pd.read_csv(file_list[1], sep=',', skiprows=8)  # adjust skiprows if needed. Since the first 8 rows are metadata from measurement, we can skip them for the plots
df

Now we can prepare a plot of "Reading" against "Time".
To make it cleaner, we will prepare a plotting function:

In [None]:
def prepare_plot(df: pd.DataFrame, experiment_name: str) -> Figure:
    fig, ax = plt.subplots()
    ax.plot(df["Relative Time"], df["Reading"])
    ax.set_title(experiment_name)
    ax.set_xlabel("Relative Time")
    ax.set_ylabel("Reading")
    return fig


plots = dict()
for filepath in file_list[1:]:
    #try:
    # Read CSV or XLSX
    match filepath.suffix:
        case ".csv":
            df = pd.read_csv(filepath, sep=',', skiprows=8)  # adjust skiprows if needed
        case ".xlsx":
            df = pd.read_excel(filepath)
        case _:
            continue
    dfs.append(df)
    print(filepath.name)
    new_fig = prepare_plot(df, str(filepath.stem))
    plots[filepath.stem] = new_fig
    

## Cleaning the plots
We get the whole list of names from 'files found by os.walk'.
We then went through the list and split the plots into successful and unsuccessful experiments. The list are saved as separate textfiles.

Successful experiments:
- 230621_FS9-3.csv
- 230314_FS_1-3.csv
- 230206_FS_5-3_0207_151545.csv
- pentanolfs_0922_160111.csv
- 230605_FS2-3.csv
- 230330_FS2-3.csv
- 230207fs1-3_0208_153427.csv
- FS_EtOH_220920_132622.csv
- 230605_FS2-3.csv
- 230307_FS_2-3.csv
  
unsuccessful experiments:
- DMSO_FS_0923_133114.csv
- FS-Aceton_220921_124032.csv
- 230315_FS_5-3.csv
- 230223_FS_5-3.csv
- 230209_FS_2-3.csv
- acetonfs_0924_150458.csv
- 230530FS1-3.csv

In [None]:
plots

In [None]:
#read text files, clean the names
with open("successful.txt") as f:
    successful_filenames = [line.strip() for line in f]  # removes whitespace and newlines
with open("unsuccessful.txt") as f:
    unsuccessful_filenames = [line.strip() for line in f]

successful_list = [f for f in file_list if os.path.basename(f) in successful_filenames]

unsuccessful_list  = [f for f in file_list if os.path.basename(f) in unsuccessful_filenames]




In [None]:
#define the plot: could be done in a funciton

def plot_group(file_list, title):
    fig = plt.figure(figsize=(8,4))  # One figure for the whole group
    for filepath in file_list:

        # Read CSV or Excel
        if filepath.suffix == ".csv":
            df = pd.read_csv(filepath, sep=',', skiprows=8)  # adjust skiprows if needed
        else:
            df = pd.read_excel(filepath)

        # Plot line with filename as label
        label = os.path.basename(filepath)
        plt.plot(df["Relative Time"], df["Reading"], label=label, alpha=0.8)


    plt.title(title)
    plt.xlabel("Relative Time")
    plt.ylabel("Reading")
    plt.legend(loc='lower center')
    plt.grid(True)
    plt.tight_layout()
    plt.show()
    return fig

In [None]:
# Combined plot for successful files
successful_fig = plot_group(successful_list, "successful experiments")

# Combined plot for unsuccessful files
unsuccessful_fig = plot_group(unsuccessful_list, "unsuccessful experiments")

Now we want to save the results `successful_fig` and `unsuccessful_fig`in openBIS as a result.
To do so, we first save them as PDF.

In [None]:
successful_fig.savefig("sucessfull.png")
unsuccessful_fig.savefig("unsuccessful.png")




In [None]:

dest_object = ob_connection.get_object("/GROUPA_STEFFI/RENKU_USE_CASE/FOLDER4")

ds_new = ob_connection.new_dataset(sample=dest_object, files=["sucessfull.png", "unsuccessful.png"], type="ELN_PREVIEW")
ds_new.save()
