In [53]:
import io
import os
import tempfile
import uuid

import h5py
import pandas as pd
import numpy as np
import requests


In [20]:
BASE_URL = "http://127.0.0.1:8000"


In [21]:
EXAMPLE_DF = pd.DataFrame(
    {1: [1, 2, None], 2: [3, None, 4]}, index=["ACH-000014", "ACH-000052", "ACH-000279"]
)
feature_DF = pd.DataFrame({"id": EXAMPLE_DF.columns, "label": ["one", "two"]})
SAMPLE_DF = pd.DataFrame({"id": EXAMPLE_DF.index})


In [104]:
def make_request(
    method,
    url,
    data=None,
    json=None,
    files=None,
    headers={"X-Forwarded-Email": "breadbox-admin@broadinstitute.org"},
    stream=False,
):
    return requests.request(
        method,
        f"{BASE_URL}{url}",
        headers=headers,
        data=data,
        json=json,
        files=files,
        stream=stream,
    )


def get_groups(**kwargs):
    return make_request("GET", "/groups", **kwargs)


def get_datasets(**kwargs):
    return make_request("GET", "/datasets", **kwargs)


def get_dataset(dataset_id: str, **kwargs):
    return make_request("GET", f"/datasets/{dataset_id}", **kwargs)


def get_dataset_data(dataset_id: str, **kwargs):
    return make_request("GET", f"/datasets/{dataset_id}/data", stream=True, **kwargs)


def get_dataset_entities(dataset_id: str, **kwargs):
    return make_request("GET", f"/datasets/{dataset_id}/entities", **kwargs)


def get_dataset_samples(dataset_id: str, **kwargs):
    return make_request("GET", f"/datasets/{dataset_id}/samples", **kwargs)


def post_example_datasets(group_id: str, **kwargs):
    with tempfile.TemporaryDirectory() as d:
        EXAMPLE_DF.to_csv(os.path.join(d, "data.csv"))
        feature_DF.to_csv(os.path.join(d, "entities.csv"), index=False)
        SAMPLE_DF.to_csv(os.path.join(d, "samples.csv"), index=False)

        files = {
            "data_file": (
                "data.csv",
                open(os.path.join(d, "data.csv"), "rb"),
                "text/csv",
            ),
            "feature_file": (
                "feature.csv",
                open(os.path.join(d, "entities.csv"), "rb"),
                "text/csv",
            ),
            "sample_file": (
                "sample.csv",
                open(os.path.join(d, "samples.csv"), "rb"),
                "text/csv",
            ),
        }

        dataset = dict(
            name="d1",
            units="unit",
            feature_type="gene",
            sample_type="cell_line",
            group_id=group_id,
        )

        r = make_request("POST", "/datasets", data=dataset, files=files, **kwargs)
    return r


In [32]:
def create_public_group():
    group = make_request("POST", "/groups", json={"name": "PUBLIC"}).json()
    make_request(
        "POST",
        f"/groups/{group['id']}",
        json={"access_type": "read", "email": ".*", "exact_match": False},
    )
    return group["id"]


In [33]:
group_id = create_public_group()


In [35]:
group_id


'100c3c10-a98e-42b0-b7de-f9dc8e33de4e'

In [36]:
r = post_example_datasets(group_id)
r.content


b'{"id":"77f1452e-5776-46c2-b683-149781bc8569","name":"d1","units":"unit","feature_type":"gene","sample_type":"cell_line","group_id":"100c3c10-a98e-42b0-b7de-f9dc8e33de4e"}'

In [37]:
dataset_submitted = r.json()
dataset_submitted


{'id': '77f1452e-5776-46c2-b683-149781bc8569',
 'name': 'd1',
 'units': 'unit',
 'feature_type': 'gene',
 'sample_type': 'cell_line',
 'group_id': '100c3c10-a98e-42b0-b7de-f9dc8e33de4e'}

In [39]:
get_datasets().json()


[{'id': '77f1452e-5776-46c2-b683-149781bc8569',
  'name': 'd1',
  'units': 'unit',
  'feature_type': 'gene',
  'sample_type': 'cell_line',
  'group_id': '100c3c10-a98e-42b0-b7de-f9dc8e33de4e'}]

In [44]:
assert get_dataset(dataset_submitted["id"]).json() == dataset_submitted
assert get_dataset(uuid.uuid4()).status_code == 404


In [45]:
r = get_dataset_samples(dataset_submitted["id"])
r.json()


{'id': ['ACH-000014', 'ACH-000052', 'ACH-000279']}

In [46]:
r = get_dataset_entities(dataset_submitted["id"])
r.json()


{'id': [1, 2], 'label': ['one', 'two']}

In [101]:
r = get_dataset_data(dataset_submitted["id"])
r


<Response [200]>

In [106]:
r = get_datasets(headers=None)
r.json()

[{'id': '77f1452e-5776-46c2-b683-149781bc8569',
  'name': 'd1',
  'units': 'unit',
  'feature_type': 'gene',
  'sample_type': 'cell_line',
  'group_id': '100c3c10-a98e-42b0-b7de-f9dc8e33de4e'}]

In [130]:
r = make_request("GET", f"/datasets/{dataset_submitted['id']}/entities/one/data?by_property=label")
r

<Response [200]>

In [132]:
d = r.json()
d

{'name': 1,
 'index': ['ACH-000014', 'ACH-000052', 'ACH-000279'],
 'data': [1.0, 2.0, None]}

In [138]:
d2 = {'columns': [1], 'index': d['index'], 'data': [[1],[2],[None]]}
pd.DataFrame(**d2)

1    1.0
dtype: float64