# Introduction

Using the models we have try to fill in the metadata spreadsheet & submit test records to the IGVF test portal.

This try had all the files from each platform type attached to one measurement set. But with Jennifer we decided this is probably a poor idea.

In [1]:
import bz2
from collections import Counter, namedtuple
import datetime
import json
import numpy
import os
import pandas
from pathlib import Path
import re
from subprocess import run, PIPE
from tqdm import tqdm
import sys
import zoneinfo

os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'mousedemo.settings')
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

import django
from django.contrib.auth import get_user_model
from django.db import DEFAULT_DB_ALIAS

MOUSEDEMO = str(Path("mousedemo").absolute())
if MOUSEDEMO not in sys.path:
    sys.path.append(MOUSEDEMO)

django.setup()

from mousedemo import settings
from igvf_mice import models

In [2]:
EC = str(Path("~/proj/encoded_client").expanduser())
if EC not in sys.path:
    sys.path.append(EC)
    
from encoded_client import encoded

In [3]:
#server_name = "api.sandbox.igvf.org"
# accession_prefix = "igvftst"
# upload_target = "sandbox"
server_name = "api.data.igvf.org"
accession_prefix = "igvf"
upload_target = "production"

#server_name = "igvfd-test-igvf-532-sandbox-db.demo.igvf.org"
assert "/" not in server_name
assert ".." not in server_name

server = encoded.ENCODED(server_name)

validator = encoded.DCCValidator(server)

In [4]:
award = "/awards/HG012077/"
labs = ["/labs/lior-pachter/", "/labs/grant-macgregor/", "/labs/barbara-wold/", "/labs/ali-mortazavi/"]
lab = labs[-1]
jax = "/sources/jackson-labs/"
species = 'Mus musculus'

In [5]:
test_run_name =  "IGVF_B01"
plate = models.SplitSeqPlate.objects.get(name=test_run_name)


In [6]:
tissues = {}
mice = {}
for well in plate.splitseqwell_set.all():
    for biosample in well.biosample.all():
        for tissue in biosample.tissue.all():
            tissues[tissue.name] = tissue
            mice[tissue.mouse.name] = tissue.mouse

# rodent_donor

In [7]:
def format_sex(value):
    if models.SexEnum.MALE == value:
        return "male"
    elif models.SexEnum.FEMALE == value:
        return "female"

def get_accession_string_or_none(record, accession_prefix):
    accessions = record.accession.filter(namespace__accession_prefix=accession_prefix)
    if len(accessions) == 0:
        return None
    elif len(accessions) == 1:
        return accessions[0]
    else:
        raise ValueError("There should only be one accession per namespace {}".format(accessions))
    
rodent_donor = []
for mouse_name in sorted(mice):
    mouse = mice[mouse_name]
    dcc_row = {
        #"#response": None,
        #"#response_time": None,
        "accession": get_accession_string_or_none(mouse, accession_prefix),
        "uuid": None, 
        "aliases:array": f"ali-mortazavi:{mouse.name}",
        "award": award,
        "lab": lab,
        "taxa": species,
        "sex": format_sex(mouse.sex),
        "strain": mouse.strain.name,
        "references": None,
        "url": mouse.strain.see_also,
        "sources:array": jax,
        "lot_id": None,
        "product_id": mouse.strain.jax_catalog_number,
        "documents": None,
        "alternate_accessions": None,
        "submitter_comment": None,
        "description": None,
        "parents": None,
        "traits": None,
        "phenotypic_features": None,
        "external_resources": None,
        "strain_background": mouse.strain.igvf_id,
        "genotype": None,
        "individual_rodent:boolean": True,
        "rodent_identifier": mouse.name,
    }
    rodent_donor.append(dcc_row)
    
rodent_donor = pandas.DataFrame(rodent_donor)

dry_run = True
created = server.post_sheet("rodent_donor", rodent_donor, dry_run=dry_run, verbose=True, validator=validator)
if len(created) > 0 and not dry_run:
    rodent_donor.to_excel("rodent_donor.{}.xlsx".format(server_name))
rodent_donor

Unnamed: 0,accession,uuid,aliases:array,award,lab,taxa,sex,strain,references,url,...,submitter_comment,description,parents,traits,phenotypic_features,external_resources,strain_background,genotype,individual_rodent:boolean,rodent_identifier
0,would create,,ali-mortazavi:105_CASTJ_10M,/awards/HG012077/,/labs/ali-mortazavi/,Mus musculus,male,CASTJ,,https://www.jax.org/strain/000928,...,,,,,,,CAST/EiJ (CAST),,True,105_CASTJ_10M
1,would create,,ali-mortazavi:106_CASTJ_10F,/awards/HG012077/,/labs/ali-mortazavi/,Mus musculus,female,CASTJ,,https://www.jax.org/strain/000928,...,,,,,,,CAST/EiJ (CAST),,True,106_CASTJ_10F
2,would create,,ali-mortazavi:107_CASTJ_10M,/awards/HG012077/,/labs/ali-mortazavi/,Mus musculus,male,CASTJ,,https://www.jax.org/strain/000928,...,,,,,,,CAST/EiJ (CAST),,True,107_CASTJ_10M
3,would create,,ali-mortazavi:108_CASTJ_10F,/awards/HG012077/,/labs/ali-mortazavi/,Mus musculus,female,CASTJ,,https://www.jax.org/strain/000928,...,,,,,,,CAST/EiJ (CAST),,True,108_CASTJ_10F
4,would create,,ali-mortazavi:117_B6J_10M,/awards/HG012077/,/labs/ali-mortazavi/,Mus musculus,male,B6J,,https://www.jax.org/strain/000664,...,,,,,,,C57BL/6J (B6),,True,117_B6J_10M
5,would create,,ali-mortazavi:118_B6J_10F,/awards/HG012077/,/labs/ali-mortazavi/,Mus musculus,female,B6J,,https://www.jax.org/strain/000664,...,,,,,,,C57BL/6J (B6),,True,118_B6J_10F
6,would create,,ali-mortazavi:119_B6J_10M,/awards/HG012077/,/labs/ali-mortazavi/,Mus musculus,male,B6J,,https://www.jax.org/strain/000664,...,,,,,,,C57BL/6J (B6),,True,119_B6J_10M
7,would create,,ali-mortazavi:120_B6J_10F,/awards/HG012077/,/labs/ali-mortazavi/,Mus musculus,female,B6J,,https://www.jax.org/strain/000664,...,,,,,,,C57BL/6J (B6),,True,120_B6J_10F


# tissue

In [8]:
def get_accession_list_or_none(record, accession_prefix, length):
    accessions = record.accession.filter(namespace__accession_prefix=accession_prefix)
    if len(accessions) == 0:
        return [None] * length
    elif len(accessions) == length:
        return [x.name for x in accessions]
    else:
        raise ValueError("Unexpected number of accessions {} {}".format(accessions, length))

tissue_sheet = []
for tissue_name in tissues:
    tissue = tissues[tissue_name]
    donor_alias = f"ali-mortazavi:{tissue.mouse.name}"
    ontology_terms = tissue.ontology_term.all()
    accessions = get_accession_list_or_none(tissue, accession_prefix, len(ontology_terms))
    for term, accession_id in zip(ontology_terms, accessions):
        curie = term.curie.replace(":", "_")
        alias = f"ali-mortazavi:{tissue.name}_{curie}"
        sample_term = "/sample-terms/{}/".format(curie)
        if numpy.all(pandas.isnull(accession_id)):
            accession_id = None
        dcc_row = {
            "accession": accession_id,
            "uuid": None,
            "aliases:array": alias,
            "award": award,
            "lab": lab,
            "sources:array": jax,
            "donors:array": donor_alias,
            "taxa": species,
            "sample_terms:array": sample_term,
            "term_names:skip": term.name,
        }
        tissue_sheet.append(dcc_row)
    
tissue_sheet = pandas.DataFrame(tissue_sheet)

dry_run=True
created = server.post_sheet("tissue", tissue_sheet, dry_run=dry_run, verbose=True, validator=validator)
if len(created) > 0 and not dry_run:
    tissue_sheet.to_excel("tissue_sheet.{}.xlsx".format(server_name))
tissue_sheet

Unnamed: 0,accession,uuid,aliases:array,award,lab,sources:array,donors:array,taxa,sample_terms:array,term_names:skip
0,would create,,ali-mortazavi:118_B6J_10F_21_NTR_0000646,/awards/HG012077/,/labs/ali-mortazavi/,/sources/jackson-labs/,ali-mortazavi:118_B6J_10F,Mus musculus,/sample-terms/NTR_0000646/,left cerebral cortex
1,would create,,ali-mortazavi:117_B6J_10M_21_NTR_0000646,/awards/HG012077/,/labs/ali-mortazavi/,/sources/jackson-labs/,ali-mortazavi:117_B6J_10M,Mus musculus,/sample-terms/NTR_0000646/,left cerebral cortex
2,would create,,ali-mortazavi:106_CASTJ_10F_21_NTR_0000646,/awards/HG012077/,/labs/ali-mortazavi/,/sources/jackson-labs/,ali-mortazavi:106_CASTJ_10F,Mus musculus,/sample-terms/NTR_0000646/,left cerebral cortex
3,would create,,ali-mortazavi:105_CASTJ_10M_21_NTR_0000646,/awards/HG012077/,/labs/ali-mortazavi/,/sources/jackson-labs/,ali-mortazavi:105_CASTJ_10M,Mus musculus,/sample-terms/NTR_0000646/,left cerebral cortex
4,would create,,ali-mortazavi:120_B6J_10F_21_NTR_0000646,/awards/HG012077/,/labs/ali-mortazavi/,/sources/jackson-labs/,ali-mortazavi:120_B6J_10F,Mus musculus,/sample-terms/NTR_0000646/,left cerebral cortex
5,would create,,ali-mortazavi:119_B6J_10M_21_NTR_0000646,/awards/HG012077/,/labs/ali-mortazavi/,/sources/jackson-labs/,ali-mortazavi:119_B6J_10M,Mus musculus,/sample-terms/NTR_0000646/,left cerebral cortex
6,would create,,ali-mortazavi:108_CASTJ_10F_21_NTR_0000646,/awards/HG012077/,/labs/ali-mortazavi/,/sources/jackson-labs/,ali-mortazavi:108_CASTJ_10F,Mus musculus,/sample-terms/NTR_0000646/,left cerebral cortex
7,would create,,ali-mortazavi:107_CASTJ_10M_21_NTR_0000646,/awards/HG012077/,/labs/ali-mortazavi/,/sources/jackson-labs/,ali-mortazavi:107_CASTJ_10M,Mus musculus,/sample-terms/NTR_0000646/,left cerebral cortex


# multiplexed_tissue

In [9]:
def get_samples_donors(plate):
    samples = {}
    donors = {}

    for well in plate.splitseqwell_set.all():
        for biosample in well.biosample.all():
            for tissue in biosample.tissue.all():
                mouse_alias = f"ali-mortazavi:{tissue.mouse.name}"
                # I need an ordered set of donors
                donors[mouse_alias] = tissue.mouse
                for term in tissue.ontology_term.all():
                    curie = term.curie.replace(":", "_")
                    tissue_alias = f"ali-mortazavi:{tissue.name}_{curie}"
                    samples[tissue_alias] = None

    return {
        "samples": list(samples.keys()),
        "donor": list(donors.keys()),
        "source": set([donors[donor_id].source.igvf_id for donor_id in donors],)
    }

In [10]:
for subpool in plate.subpool_set.all():
    print(subpool.name, subpool.subpoolinrun_set.count())

B01_13A 2
B01_13B 2
B01_13C 2
B01_13D 2
B01_13E 2
B01_13F 2
B01_13G 2
B01_13H 2
B01_13I 0
B01_13J 0
B01_13K 0
B01_13L 0
B01_13M 0
B01_13N 0
B01_13O 0
B01_13P 0
B01_13Q 0
B01_13R 0


In [11]:
def get_subpool_accession(alias, subpool):
    if subpool is None:
        try:
            posted = server.get_json(alias)
            return posted["accession"]
        except encoded.HTTPError as e:
            if e.response.status_code == 404:
                pass
            else:
                print("HTTPError", e)
        return None

    accessions = subpool.accession.filter(namespace__accession_prefix=accession_prefix)
    if len(accessions) > 1:
        raise RuntimeError("Too many accessions")
    elif len(accessions) == 1:
        return accessions[0].name

    return None


multiplexed_tissue = []

plate_details = get_samples_donors(plate)
assert len(plate_details["source"]) == 1, "Not sure what do with mixed sources yet."
source = next(iter(plate_details["source"]))
multiplexed_sample = []
for subpool in plate.subpool_set.all():
    if subpool.subpoolinrun_set.count() > 0:
        alias = "ali-mortazavi:subpool_{}".format(subpool.name.lower())
        multiplexed_sample.append({
            "accession": get_subpool_accession(alias, subpool),
            "uuid": None,
            "aliases:array": alias,
            "cellular_sub_pool": subpool.name,
            "award": award,
            "lab": lab,
            "sources:array": source,
            "multiplexed_samples:array": ",".join(plate_details["samples"]),
        })
#print(json.dumps(multiplexed_sample, indent=2))
multiplexed_sample = pandas.DataFrame(multiplexed_sample)


dry_run=True
created = server.post_sheet("multiplexed_sample", multiplexed_sample, dry_run=dry_run, verbose=True, validator=validator)
if len(created) > 0 and not dry_run:
    multiplexed_sample.to_excel("multiplexed_sample.{}.xlsx".format(server_name), index=False)
multiplexed_sample


Unnamed: 0,accession,uuid,aliases:array,cellular_sub_pool,award,lab,sources:array,multiplexed_samples:array
0,would create,,ali-mortazavi:subpool_b01_13a,B01_13A,/awards/HG012077/,/labs/ali-mortazavi/,/sources/jackson-labs/,"ali-mortazavi:118_B6J_10F_21_NTR_0000646,ali-m..."
1,would create,,ali-mortazavi:subpool_b01_13b,B01_13B,/awards/HG012077/,/labs/ali-mortazavi/,/sources/jackson-labs/,"ali-mortazavi:118_B6J_10F_21_NTR_0000646,ali-m..."
2,would create,,ali-mortazavi:subpool_b01_13c,B01_13C,/awards/HG012077/,/labs/ali-mortazavi/,/sources/jackson-labs/,"ali-mortazavi:118_B6J_10F_21_NTR_0000646,ali-m..."
3,would create,,ali-mortazavi:subpool_b01_13d,B01_13D,/awards/HG012077/,/labs/ali-mortazavi/,/sources/jackson-labs/,"ali-mortazavi:118_B6J_10F_21_NTR_0000646,ali-m..."
4,would create,,ali-mortazavi:subpool_b01_13e,B01_13E,/awards/HG012077/,/labs/ali-mortazavi/,/sources/jackson-labs/,"ali-mortazavi:118_B6J_10F_21_NTR_0000646,ali-m..."
5,would create,,ali-mortazavi:subpool_b01_13f,B01_13F,/awards/HG012077/,/labs/ali-mortazavi/,/sources/jackson-labs/,"ali-mortazavi:118_B6J_10F_21_NTR_0000646,ali-m..."
6,would create,,ali-mortazavi:subpool_b01_13g,B01_13G,/awards/HG012077/,/labs/ali-mortazavi/,/sources/jackson-labs/,"ali-mortazavi:118_B6J_10F_21_NTR_0000646,ali-m..."
7,would create,,ali-mortazavi:subpool_b01_13h,B01_13H,/awards/HG012077/,/labs/ali-mortazavi/,/sources/jackson-labs/,"ali-mortazavi:118_B6J_10F_21_NTR_0000646,ali-m..."


# measurement_set

In [12]:
for run in models.SequencingRun.objects.all():
    print(run.platform.name, run.name, run.plate, run.subpoolinrun_set.count())

nextseq2000 igvf_003/nextseq IGVF_003 3
nanopore igvf_003/nanopore IGVF_003 1
novaseq6000 igvf_003/nova1 IGVF_003 15
novaseq6000 igvf_003/nova2 IGVF_003 15
nextseq2000 igvf_004/nextseq IGVF_004 3
novaseq6000 igvf_004/nova1 IGVF_004 15
novaseq6000 igvf_004/nova2 IGVF_004 15
nextseq2000 igvf_005/nextseq IGVF_005 3
novaseq6000 igvf_005/nova1 IGVF_005 15
novaseq6000 igvf_005/nova2 IGVF_005 15
novaseq6000 igvf_007/nova1 IGVF_007 15
novaseq6000 igvf_007/nova2 IGVF_007 15
nextseq2000 igvf_008/nextseq IGVF_008 3
novaseq6000 igvf_008/nova1 IGVF_008 9
nextseq2000 igvf_b01/next1 IGVF_B01 8
nextseq2000 igvf_b01/next2 IGVF_B01 8


In [13]:
run.subpoolinrun_set.count()

8

In [14]:
measurement_sets = {}
for run in models.SequencingRun.objects.all():
    platform = run.platform.family
    for subpoolinrun in run.subpoolinrun_set.all():
        #print(run.name, run.plate.name, run.platform.name, subpoolrun.id, subpoolrun.subpool.name, subpoolrun.subpoolrunfile_set.count())
        name = "{}_{}".format(subpoolinrun.subpool.name, platform)
        alias = f"ali-mortazavi:{name}"
        measurement_sets.setdefault(alias, set()).add(subpoolinrun)


In [15]:
def get_measurement_set_accession(alias, measurement_set):
    if measurement_set is None:
        try:
            posted = server.get_json(alias)
            return posted["accession"]
        except encoded.HTTPError as e:
            if e.response.status_code == 404:
                pass
            else:
                print("HTTPError", e)
        return None

    accessions = measurement_set.accession.filter(namespace__accession_prefix=accession_prefix)
    if len(accessions) > 1:
        raise RuntimeError("Too many accessions")
    elif len(accessions) == 1:
        return accessions[0].name

    return None

igvf_test_ns = models.AccessionNamespace.objects.get(name="IGVF test")
measurements = []
sequence_data = []
#plate_cache = {}

sequencing_library_type_map = {
    "NO": 'mRNA enriched',
    "EX": "exome capture",
}

for alias in measurement_sets:
    run = next(iter(measurement_sets[alias]))
    subpool = run.subpool
    plate = run.sequencing_run.plate
    if plate.name == test_run_name:   
        #plate_details = plate_cache.setdefault(plate, get_samples_donors(plate))        
        measurement_row = {
            "accession": get_measurement_set_accession(alias, run.measurement_set),
            "uuid": None,
            "aliases:array": alias,
            "award": award,
            "lab": lab,
            "assay_term": "/assay-terms/OBI_0003109/", # single-nucleus RNA sequencing assay
            "sequencing_library_type:array": ",".join([sequencing_library_type_map[subpool.selection_type]]),
            "documents": None,
            "alternate_accessions": None,
            #"submitter_comment": None,
            "description": None,
            # old attach all the samples to the measurement_set
            #"samples:array": ",".join(plate_details["samples"]),
            # New attach the multiplexed_sample to the measurement_set
            "samples:array": "ali-mortazavi:subpool_{}".format(subpool.name.lower()),
            "protocol": None,
        }
        measurements.append(measurement_row)
            
measurements = pandas.DataFrame(measurements)
if 1:
    measurements.to_excel("measurement_set.{}.xlsx".format(server_name), index=False)
dry_run=True
created = server.post_sheet("measurement_set", measurements, dry_run=dry_run, verbose=True, validator=validator)
if len(created) > 0 and not dry_run:
    measurements.to_excel("measurement_set.{}.xlsx".format(server_name), index=False)
measurements



Unnamed: 0,accession,uuid,aliases:array,award,lab,assay_term,sequencing_library_type:array,documents,alternate_accessions,description,samples:array,protocol
0,would create,,ali-mortazavi:B01_13A_illumina,/awards/HG012077/,/labs/ali-mortazavi/,/assay-terms/OBI_0003109/,mRNA enriched,,,,ali-mortazavi:subpool_b01_13a,
1,would create,,ali-mortazavi:B01_13B_illumina,/awards/HG012077/,/labs/ali-mortazavi/,/assay-terms/OBI_0003109/,mRNA enriched,,,,ali-mortazavi:subpool_b01_13b,
2,would create,,ali-mortazavi:B01_13C_illumina,/awards/HG012077/,/labs/ali-mortazavi/,/assay-terms/OBI_0003109/,mRNA enriched,,,,ali-mortazavi:subpool_b01_13c,
3,would create,,ali-mortazavi:B01_13D_illumina,/awards/HG012077/,/labs/ali-mortazavi/,/assay-terms/OBI_0003109/,mRNA enriched,,,,ali-mortazavi:subpool_b01_13d,
4,would create,,ali-mortazavi:B01_13E_illumina,/awards/HG012077/,/labs/ali-mortazavi/,/assay-terms/OBI_0003109/,mRNA enriched,,,,ali-mortazavi:subpool_b01_13e,
5,would create,,ali-mortazavi:B01_13F_illumina,/awards/HG012077/,/labs/ali-mortazavi/,/assay-terms/OBI_0003109/,mRNA enriched,,,,ali-mortazavi:subpool_b01_13f,
6,would create,,ali-mortazavi:B01_13G_illumina,/awards/HG012077/,/labs/ali-mortazavi/,/assay-terms/OBI_0003109/,exome capture,,,,ali-mortazavi:subpool_b01_13g,
7,would create,,ali-mortazavi:B01_13H_illumina,/awards/HG012077/,/labs/ali-mortazavi/,/assay-terms/OBI_0003109/,mRNA enriched,,,,ali-mortazavi:subpool_b01_13h,


In [16]:
#sir = next(iter(measurement_sets[alias]))

In [17]:
#sir.subpool

In [18]:
def resolve_aliases(entities):
    resolved = []
    for element in entities:
        if ":" in element:
            obj = server.get_json(element)
            resolved.append(obj["@id"])
        else:
            resolved.append(element)
    return resolved

#resolve_aliases(["ali-mortazavi:118_B6J_10F_21_NTR_0000646","ali-mortazavi:117_B6J_10M_21_NTR_0000646","ali-mortazavi:106_CASTJ_10F_21_NTR_0000646"])
                 

In [19]:
# if need to fix sample list in measurement set.
if 0:
    for i, row in measurements.iterrows():
        measurement_set = server.get_json(row["accession"])
        desired_biosample = resolve_aliases(row["samples:array"].split(","))

        payload = {}
        different_biosample = set(desired_biosample).symmetric_difference(measurement_set["samples"])
        if len(different_biosample) > 0:
            payload.update({"samples": desired_biosample})
           

        sequencing_library_type = row["sequencing_library_type:array"].split(",")
        if measurement_set.get("sequencing_library_type") != sequencing_library_type:
            payload.update({"sequencing_library_type": sequencing_library_type})
            
        print(measurement_set["@id"], payload)
        #if True and len(payload) > 0:
        #    print(server.patch_json(measurement_set["@id"], payload))

In [20]:
measurements.to_excel("measurement_set.{}.xlsx".format(server_name), index=False)

In [21]:
for i, row in measurements.iterrows():
    if not (pandas.isnull(row["accession"]) or row["accession"] == "would create"):
        dcc = server.get_json("/measurement_set/{}/".format(row["accession"]))
        print(row["accession"], len(dcc["samples"]), len(row["samples:array"].split(",")))

In [None]:
sequence_data = []
plate_cache = {}

for alias in measurement_sets:
    run_break = None
    run_id = 1    
    for run in measurement_sets[alias]:
        #print(type(run), run.id, run.subpool.name, run.sequencing_run.name, run.measurement_set)
        plate = run.sequencing_run.plate
        if plate.name == test_run_name:
            plate_details = plate_cache.setdefault(plate, get_samples_donors(plate))
            
            for run_file in run.subpoolinrunfile_set.order_by("subpool_run", "lane", "read"):
                # Need to lookup accession
                
                current_break = (run_file.subpool_run.sequencing_run.id, run_file.subpool_run.id, run_file.lane)
                if run_break is None:
                    run_break = current_break
                elif run_break != current_break:
                    run_id += 1
                    run_break = current_break
                
                accession = run_file.accession.filter(namespace__accession_prefix=accession_prefix)
                if len(accession) > 1:
                    print("Several accessions? {} {}".format(run_file.filename, ",".join(accessions)))
                    accession = accession[0].name
                elif len(accession) == 1:
                    accession = accession[0].name
                else:
                    accession = None
                
                file_row = {
                    "accession": accession,
                    "uuid": None,
                    #"aliases": None,
                    "award": award,
                    "lab": lab,
                    "md5sum": run_file.md5sum,
                    "file_format": "fastq",
                    "file_set": alias,
                    "content_type": "reads",
                    "sequencing_run:integer": run_id,
                    "documents:array": None,
                    "submitter_comment": None,
                    "description": None,
                    "dbxrefs": None,
                    "derived_from": None,
                    "seqspec": None,
                    "submitted_file_name": run_file.filename,
                    "illumina_read_type": run_file.read,
                }
                sequence_data.append(file_row)                
        

sequence_data = pandas.DataFrame(sequence_data)
sequence_data.to_excel("sequence_data.{}.xlsx".format(server_name), index=False)
sequence_data

In [None]:
with pandas.ExcelWriter("{}_{}.xlsx".format(test_run_name, upload_target)) as book:
    rodent_donor.to_excel(book, sheet_name="rodent_donor", index=None)
    tissue_sheet.to_excel(book, sheet_name="tissue", index=None)
    multiplexed_sample.to_excel(book, sheet_name="multiplexed_sample", index=None)
    measurements.to_excel(book, sheet_name="measurement_set", index=None)
    sequence_data.to_excel(book, sheet_name="sequence_data", index=None)
