In [1]:
from dash import Dash, html, dash_table, dcc, callback, Output, Input
import dash
import pandas as pd
import polars as pl
import datetime as dt
import plotly.express as px
from google.cloud import storage
from configparser import ConfigParser
from google.oauth2 import service_account
from googleapiclient.discovery import build
from utils.demo_io import (
    get_initial_slide_df,
    get_fovs_df,
    get_top_level_dirs,
    populate_slide_rows,
    get_histogram_df,
    get_image,
)
import polars as pl
from gcsfs import GCSFileSystem
from PIL import Image
from io import BytesIO

# Parse in key and bucket name from config file
cfp = ConfigParser()
cfp.read("config.ini")

service_account_key_json = cfp["GCS"]["gcs_storage_key"]
gs_url = cfp["GCS"]["bucket_url"]

bucket_name = gs_url.replace("gs://", "")

# Define GCS file system so files can be read
gcs = GCSFileSystem(token=service_account_key_json)

# Authenticate using the service account key file
credentials = service_account.Credentials.from_service_account_file(
    service_account_key_json, scopes=["https://www.googleapis.com/auth/cloud-platform"]
)

# Create a storage client
storage_service = build("storage", "v1", credentials=credentials)


In [2]:
page_name = "595-AS13_2022-04-13_12-49-37.776201"

In [11]:
fovs_df = get_fovs_df(storage_service, bucket_name, [page_name])

In [22]:
def get_image(
    storage_service,
    bucket_name,
    slide_name,
    uri,
    resize_factor=1.0,
):
    """
    :brief: returns a file object corresponding to the image at the given uri
    :param uri: uri of the image, omitting bucket name
    """
    prefix = slide_name
    if not prefix.endswith("/"):
        prefix += "/"
    prefix += "spot_detection_result/"
    image = Image.open(
        BytesIO(
            (
                storage_service.objects()
                .get_media(bucket=bucket_name, object=(prefix + uri))
                .execute()
            )
        )
    )
    image = image.resize((int(image.size[0] * resize_factor), int(image.size[1] * resize_factor)))
    return image

In [23]:
get_image(
    storage_service,
    bucket_name,
    page_name,
    fovs_df["image_uri"][0].split("/")[-1],
    resize_factor=0.1
).show()

In [2]:
get_top_level_dirs(storage_service, bucket_name)

['3D7_2022-04-12_12-31-59.262248/',
 '595-AS13_2022-04-13_12-49-37.776201/',
 'BUS-114-1_2023-01-21_19-25-3.663354/',
 'BUS-114-4_2023-01-22_14-16-12.198770/',
 'BUS-115-3_2022-04-14_13-16-21.944479/',
 'BUS-115-3_2023-01-22_14-38-0.250020/',
 'BUS-115-4_2023-01-22_14-59-56.741396/',
 'BUS-127-1_2023-06-03_20-49-47.385834/',
 'BUS-127-2_2023-06-03_20-24-44.574780/',
 'BUS-128-1_2023-06-03_18-07-36.860664/',
 'BUS-129-1_2023-09-01_15-04-41.559592/',
 'BUS-129-2_2023-09-02_16-50-8.886882/',
 'BUS_2023-09-01_14-24-3.896428/',
 'DPSP-0877-AB-1_2023-02-22_19-51-21.616158/',
 'DPSP-0948-ND-1_2023-02-23_17-49-22.377072/',
 'DPSP-0952-AR-1_2023-02-22_20-07-0.416412/',
 'DPSP-1022-AZ-1_2023-02-22_20-28-39.763125/',
 'DPSP-1070-AS-1_2023-02-23_17-37-57.653301/',
 'Fingerprick_HL_2023-01-27_B01_2023-02-17_13-04-48.321619/',
 'Fingerprick_HL_2023-01-27_B02_2023-02-17_15-31-53.777898/',
 'Fingerprick_HL_2023-01-27_B03_2023-02-17_15-50-57.776833/',
 'Fingerprick_HL_2023-01-27_C02_2023-02-17_16-13-57

In [9]:
def get_spots_csv(storage_service, bucket_name, gcs, slide_name):
    """
    :brief: returns a dataframe corresponding to the spots data for a given slide
    :param storage_service: storage service object
    :param bucket_name: name of bucket
    :param slide_name: name of slide, not including bucket name
    :return spots_csv: polars dataframe corresponding to spots data for given slide
    """
    spot_data_raw_file_path = (
        bucket_name.strip("/")
        + "/patient_slides_analysis/"
        + slide_name
        + "_ann_w_pred.csv"
    )
    try:
        with gcs.open(spot_data_raw_file_path, "rb") as f:
            spots_csv = pl.read_csv(f)
            return spots_csv
    except:
        print("No spot_data_raw.csv found for " + str(slide_name))
        return None

In [10]:
get_spots_csv(storage_service, bucket_name, gcs, "BUS-114-1_2023-01-21_19-25-3.663354")

octopi-malaria-uganda-2022-data-processing/patient_slides_analysis/BUS-114-1_2023-01-21_19-25-3.663354_ann_w_pred.csv


index,annotation,non-parasite output,parasite output,unsure output
i64,i64,f64,f64,f64
328685,-1,7.0117e-9,0.999977,0.000023
331891,-1,2.0538e-9,0.9999739,0.000026
344495,-1,2.8992e-8,0.9999696,0.00003
314451,-1,3.8813e-9,0.999956,0.000044
244651,-1,9.4928e-8,0.9999552,0.000045
329138,-1,1.6429e-8,0.9999547,0.000045
119469,-1,4.3904e-8,0.9999535,0.000046
263990,-1,5.9838e-9,0.9999521,0.000048
166323,-1,3.2403e-8,0.9999374,0.000063
208679,-1,6.4593e-8,0.999936,0.000064
