Preparing to submit wold stranded samples....


In [1]:
import os
import sys
import requests
import pandas
import paramiko
import re
import json
from IPython import display
from pathlib import Path
import configparser

In [2]:
from curation_common import *
from encoded_client.encoded import DCCValidator

In [3]:
from encoded_client.encoded import Document
from encoded_client.submission import run_aws_cp
from htsworkflow.util.api import (
    add_auth_options,
    make_auth_from_opts,
    HtswApi,
)

In [4]:
config = configparser.ConfigParser()
config.read([os.path.expanduser('~/.htsworkflow.ini'),
             '/etc/htsworkflow.ini'
             ])

SECTION = 'sequence_archive'
if config.has_section(SECTION):
    apiid = config.get(SECTION, 'apiid')
    apikey = config.get(SECTION, 'apikey')
    apihost = config.get(SECTION, 'host')

auth = {'apiid': apiid, 'apikey': apikey }
htsw = HtswApi(apihost, auth)

In [5]:
# live server & control file
server = ENCODED('www.encodeproject.org')
spreadsheet_name = Path('~/woldlab/ENCODE/stranded-25201-25235-mouse-spleen.xlsx').expanduser()
engine=None
#engine='odf'

# test server & datafile
#server = ENCODED('test.encodedcc.org')
#spreadsheet_name = os.path.expanduser('~diane/woldlab/ENCODE/C1-encode3-limb-2017-testserver.ods')

server.load_netrc()
validator = DCCValidator(server)

assert spreadsheet_name.exists()

In [6]:
award = 'UM1HG009443'

# Lookup biosample ontologies

Lookup any biosample ontologies that are already present

In [7]:
biosample_sheet = pandas.read_excel(spreadsheet_name, sheet_name='Biosample', header=0, engine=engine)

server_description = []
tcs = []
for i, row in biosample_sheet.iterrows():
    if not pandas.isnull(row.accession) and row.accession.startswith('E'):
        biosample = server.get_json(row.accession)
        server_description.append(biosample["summary"])
        aliases = biosample["aliases"]
        tcs.append(aliases[0][len("john-stamatoyannopoulos:"):])
        biosample_ontology = biosample['biosample_ontology']
        if isinstance(biosample_ontology, dict):
            biosample_sheet.loc[i, 'biosample_ontology'] = biosample_ontology['@id']
            biosample_sheet.loc[i, 'biosample_term_name:skip'] = biosample_ontology['term_name']
            for term in [("organism","@id"), ("source","@id"), ("donor","@id"), ("lab","@id"), ("award", "@id")]:
                if pandas.isnull(biosample_sheet.loc[i, term[0]]):
                    biosample_sheet.loc[i, term[0]] = biosample[term[0]][term[1]]
            
biosample_sheet["upstream_description:skip"] = server_description
biosample_sheet["TC"] = tcs
biosample_sheet

Unnamed: 0,uuid,accession,library_id:skip,cDNA_sample:skip,parent TCID:skip,TC:skip,description,biosample_ontology,biosample_term_name:skip,aliases:array,...,model_organism_age_units,mouse_life_stage,model_organism_sex,organism,source,donor,lab,award,upstream_description:skip,TC
0,,ENCBS982OLT,25201,ENC4_cDNA_901,31037,TC74649,MouseF1 of C57BL/6 (f) x CAST Eij (m)_m.CD19_s...,/biosample-types/primary_cell_CL_0000236/,B cell,barbara-wold:ENC4_cDNA_901,...,day,postnatal,female,/organisms/mouse/,/sources/jackson-labs/,/mouse-donors/ENCDO509HIY/,barbara-wold,/awards/UM1HG009444/,Mus musculus strain B6CASTF1/J female adult (6...,TC74649
1,,ENCBS904RDT,25202,ENC4_cDNA_902,31038,TC74650,MouseF1 of C57BL/6 (f) x CAST Eij (m)_m.CD19_s...,/biosample-types/primary_cell_CL_0000236/,B cell,barbara-wold:ENC4_cDNA_902,...,day,postnatal,female,/organisms/mouse/,/sources/jackson-labs/,/mouse-donors/ENCDO509HIY/,barbara-wold,/awards/UM1HG009444/,Mus musculus strain B6CASTF1/J female adult (6...,TC74650
2,,ENCBS661EAT,25203,ENC4_cDNA_903,31039,TC74651,MouseF1 of C57BL/6 (f) x CAST Eij (m)_m.CD19_s...,/biosample-types/primary_cell_CL_0000236/,B cell,barbara-wold:ENC4_cDNA_903,...,day,postnatal,female,/organisms/mouse/,/sources/jackson-labs/,/mouse-donors/ENCDO509HIY/,barbara-wold,/awards/UM1HG009444/,Mus musculus strain B6CASTF1/J female adult (6...,TC74651
3,,ENCBS575JXK,25204,ENC4_cDNA_904,31040,TC74652,MouseF1 of C57BL/6 (f) x CAST Eij (m)_m.CD19_s...,/biosample-types/primary_cell_CL_0000236/,B cell,barbara-wold:ENC4_cDNA_904,...,day,postnatal,female,/organisms/mouse/,/sources/jackson-labs/,/mouse-donors/ENCDO509HIY/,barbara-wold,/awards/UM1HG009444/,Mus musculus strain B6CASTF1/J female adult (6...,TC74652
4,,ENCBS821FUE,25205,ENC4_cDNA_905,31041,TC74653,MouseF1 of C57BL/6 (f) x CAST Eij (m)_m.CD19_s...,/biosample-types/primary_cell_CL_0000236/,B cell,barbara-wold:ENC4_cDNA_905,...,day,postnatal,male,/organisms/mouse/,/sources/jackson-labs/,/mouse-donors/ENCDO509HIY/,barbara-wold,/awards/UM1HG009444/,Mus musculus strain B6CASTF1/J male adult (67....,TC74653
5,,ENCBS489PFJ,25206,ENC4_cDNA_906,31042,TC74654,MouseF1 of C57BL/6 (f) x CAST Eij (m)_m.CD19_s...,/biosample-types/primary_cell_CL_0000236/,B cell,barbara-wold:ENC4_cDNA_906,...,day,postnatal,male,/organisms/mouse/,/sources/jackson-labs/,/mouse-donors/ENCDO509HIY/,barbara-wold,/awards/UM1HG009444/,Mus musculus strain B6CASTF1/J male adult (67....,TC74654
6,,ENCBS718WEQ,25207,ENC4_cDNA_907,31043,TC74655,MouseF1 of C57BL/6 (f) x CAST Eij (m)_m.CD19_s...,/biosample-types/primary_cell_CL_0000236/,B cell,barbara-wold:ENC4_cDNA_907,...,day,postnatal,male,/organisms/mouse/,/sources/jackson-labs/,/mouse-donors/ENCDO509HIY/,barbara-wold,/awards/UM1HG009444/,Mus musculus strain B6CASTF1/J male adult (67....,TC74655
7,,ENCBS496IKI,25208,ENC4_cDNA_908,31044,TC74656,MouseF1 of C57BL/6 (f) x CAST Eij (m)_m.CD19_s...,/biosample-types/primary_cell_CL_0000236/,B cell,barbara-wold:ENC4_cDNA_908,...,day,postnatal,male,/organisms/mouse/,/sources/jackson-labs/,/mouse-donors/ENCDO509HIY/,barbara-wold,/awards/UM1HG009444/,Mus musculus strain B6CASTF1/J male adult (67....,TC74656
8,,ENCBS032CJY,25209,ENC4_cDNA_909,32806,TC74657,MouseF1 of C57BL/6 (f) x CAST Eij (m)_m.CD19_s...,/biosample-types/primary_cell_CL_0000236/,B cell,barbara-wold:ENC4_cDNA_909,...,day,postnatal,female,/organisms/mouse/,/sources/jackson-labs/,/mouse-donors/ENCDO509HIY/,barbara-wold,/awards/UM1HG009444/,Mus musculus strain B6CASTF1/J female adult (6...,TC74657
9,,ENCBS652PQK,25210,ENC4_cDNA_910,32807,TC74658,MouseF1 of C57BL/6 (f) x CAST Eij (m)_m.CD19_s...,/biosample-types/primary_cell_CL_0000236/,B cell,barbara-wold:ENC4_cDNA_910,...,day,postnatal,female,/organisms/mouse/,/sources/jackson-labs/,/mouse-donors/ENCDO509HIY/,barbara-wold,/awards/UM1HG009444/,Mus musculus strain B6CASTF1/J female adult (6...,TC74658


In [8]:
biosample_sheet.to_excel('/dev/shm/biosamples.xlsx', index=False)

In [12]:
stam_filename = Path("~/woldlab/ENCODE/RNA for Wold lab mouse-spleen.xlsx").expanduser()
stam_metadata = pandas.read_excel(stam_filename, sheet_name="Sheet1")
print(stam_metadata.shape)

biosample_sheet = pandas.read_excel(spreadsheet_name, sheet_name='Biosample', header=0, engine=engine)
print(biosample_sheet.shape)

print(set(stam_metadata['accession']).symmetric_difference(biosample_sheet['accession']))
merged = stam_metadata[["accession", "Parental TCID:", "child TCID"]].merge(biosample_sheet[["accession", "parent TCID:skip", "TC:skip"]], left_on="accession", right_on="accession")

merged["parent"] = merged["Parental TCID:"] == merged["parent TCID:skip"]
merged["child"] = merged["child TCID"] == merged["TC:skip"]
merged

(35, 17)
(35, 20)
set()


Unnamed: 0,accession,Parental TCID:,child TCID,parent TCID:skip,TC:skip,parent,child
0,ENCBS982OLT,31037,TC74649,31037,TC74649,True,True
1,ENCBS904RDT,31038,TC74650,31038,TC74650,True,True
2,ENCBS661EAT,31039,TC74651,31039,TC74651,True,True
3,ENCBS575JXK,31040,TC74652,31040,TC74652,True,True
4,ENCBS821FUE,31041,TC74653,31041,TC74653,True,True
5,ENCBS489PFJ,31042,TC74654,31042,TC74654,True,True
6,ENCBS718WEQ,31043,TC74655,31043,TC74655,True,True
7,ENCBS496IKI,31044,TC74656,31044,TC74656,True,True
8,ENCBS032CJY,32806,TC74657,32806,TC74657,True,True
9,ENCBS652PQK,32807,TC74658,32807,TC74658,True,True


In [13]:
stam_metadata = pandas.read_excel(stam_filename, sheet_name="Sheet1")

cell_terms = []
for i, row in stam_metadata.iterrows():
    if not pandas.isnull(row.accession) and row.accession.startswith('E'):
        biosample = server.get_json(row.accession)
        biosample_ontology = biosample["biosample_ontology"]
        cell_terms.append(biosample_ontology['term_name'])
        
stam_metadata["biosample_term"] = cell_terms
stam_metadata

Unnamed: 0,species,Strain,Cell type,tissue of origin,gender (if known),age,age units,Replicate,prep date,cell/nuclei count (M),# tubes,preservation method,assay,lab,Parental TCID:,child TCID,accession,biosample_term
0,Mouse,F1 of C57BL/6 (f) x CAST Eij (m),m.CD19,spleen,F,67,days,1,2021-12-02,3.0,1,Flash frozen,RNA seq,Wold,31037,TC74649,ENCBS982OLT,B cell
1,Mouse,F1 of C57BL/6 (f) x CAST Eij (m),m.CD19,spleen,F,67,days,2,2021-12-02,3.0,1,Flash frozen,RNA seq,Wold,31038,TC74650,ENCBS904RDT,B cell
2,Mouse,F1 of C57BL/6 (f) x CAST Eij (m),m.CD19,spleen,F,67,days,3,2021-12-02,3.0,1,Flash frozen,RNA seq,Wold,31039,TC74651,ENCBS661EAT,B cell
3,Mouse,F1 of C57BL/6 (f) x CAST Eij (m),m.CD19,spleen,F,67,days,4,2021-12-02,3.0,1,Flash frozen,RNA seq,Wold,31040,TC74652,ENCBS575JXK,B cell
4,Mouse,F1 of C57BL/6 (f) x CAST Eij (m),m.CD19,spleen,M,67,days,1,2021-12-02,3.0,1,Flash frozen,RNA seq,Wold,31041,TC74653,ENCBS821FUE,B cell
5,Mouse,F1 of C57BL/6 (f) x CAST Eij (m),m.CD19,spleen,M,67,days,2,2021-12-02,3.0,1,Flash frozen,RNA seq,Wold,31042,TC74654,ENCBS489PFJ,B cell
6,Mouse,F1 of C57BL/6 (f) x CAST Eij (m),m.CD19,spleen,M,67,days,3,2021-12-02,3.0,1,Flash frozen,RNA seq,Wold,31043,TC74655,ENCBS718WEQ,B cell
7,Mouse,F1 of C57BL/6 (f) x CAST Eij (m),m.CD19,spleen,M,67,days,4,2021-12-02,3.0,1,Flash frozen,RNA seq,Wold,31044,TC74656,ENCBS496IKI,B cell
8,Mouse,F1 of C57BL/6 (f) x CAST Eij (m),m.CD19,spleen,F,62,days,1,2022-01-12,3.0,2,Flash frozen,RNA seq,Wold,32806,TC74657,ENCBS032CJY,B cell
9,Mouse,F1 of C57BL/6 (f) x CAST Eij (m),m.CD19,spleen,F,62,days,2,2022-01-12,3.0,2,Flash frozen,RNA seq,Wold,32807,TC74658,ENCBS652PQK,B cell


In [15]:
stam_metadata[["accession", "gender (if known)", "age", "Cell type", "biosample_term"]]

Unnamed: 0,accession,gender (if known),age,Cell type,biosample_term
0,ENCBS982OLT,F,67,m.CD19,B cell
1,ENCBS904RDT,F,67,m.CD19,B cell
2,ENCBS661EAT,F,67,m.CD19,B cell
3,ENCBS575JXK,F,67,m.CD19,B cell
4,ENCBS821FUE,M,67,m.CD19,B cell
5,ENCBS489PFJ,M,67,m.CD19,B cell
6,ENCBS718WEQ,M,67,m.CD19,B cell
7,ENCBS496IKI,M,67,m.CD19,B cell
8,ENCBS032CJY,F,62,m.CD19,B cell
9,ENCBS652PQK,F,62,m.CD19,B cell


In [16]:
def find_stam_replicate(accession):
    stam_metadata = pandas.read_excel(stam_filename, sheet_name="Sheet1")
    return str(stam_metadata[["accession", "Replicate"]].set_index("accession").loc[accession, "Replicate"])

find_stam_replicate("ENCBS661EAT")

'3'

# Can we map biosamples by description?

In [19]:
biosample_sheet = pandas.read_excel(spreadsheet_name, sheet_name='Biosample', header=0, engine=engine)

def normalize_sex_term(d):
    return {
        "male": "M",
        "female": "F",
    }[d["sex"]]
    
def normalize_tissue(d):
    return {
        "CD19_spleen": "B primary cell",
        "CD3_spleen": "T-cell primary cell",
        "CD4 naive_spleen": "CD4-positive naive resting alpha-beta T primary cell",
        "CD8 naive_spleen": "CD8-positive naive resting alpha-beta T primary cell",
        "monocytes_spleen": "monocyte primary cell",
        "neutrophil_spleen": "neutrophil primary cell",
    }[d["tissue"]]

def extract_terms(row, regex, field_name):
    target = {
        "accession": row["accession"],
        "library_id": row["library_id:skip"]
    }
    match = regex.search(row[field_name])
    if match is not None:
        target.update(match.groupdict())
    return target
brian_re = re.compile(r"_m.(?P<tissue>.*)_(?P<sex>[MF])_(?P<age>[\d]+)days_(?P<rep>[\d])_(?P<cDNA>ENC4_cDNA_[\d]+)")
dcc_re = re.compile(r"J (?P<sex>male|female) adult \((?P<age>[\d]+)\.00 days\) (?P<tissue>.*)")
jumpgate_records = []
dcc_records = []

matching = {}
matching_jumpgate = {}

#for i, row in biosample_sheet.iterrows():
#    term = extract_terms(row, brian_re, "description")
#    term["tissue"] = normalize_tissue(term)
#    jumpgate_records.append(term)
    
#    matching.setdefault((term["tissue"], term["age"], term["sex"], term["rep"]), {}).setdefault(term["accession"], []).append("jumpgate")
#    matching_jumpgate.setdefault((term["tissue"], term["age"], term["sex"], term["rep"]), {}).setdefault((term["accession"], str(term["library_id"])), []).append("jumpgate")
#    term = extract_terms(row, dcc_re, "upstream_description:skip")
#    term["rep"] = find_stam_replicate(row["accession"])
#    term["sex"] = normalize_sex_term(term)
#    dcc_records.append(term)
#    #matching.setdefault((term["tissue"], term["age"], term["sex"]), []).append({"dcc": term["accession"]})
#    matching.setdefault((term["tissue"], term["age"], term["sex"], term["rep"]), {}).setdefault(term["accession"], []).append("dcc")
#    matching_jumpgate.setdefault((term["tissue"], term["age"], term["sex"], term["rep"]), {}).setdefault((term["accession"], str(term["library_id"])), []).append("dcc")


#jumpgate_records = pandas.DataFrame(jumpgate_records)
#dcc_records = pandas.DataFrame(dcc_records)



In [20]:
def get_tube_ids(accession):
    stam_metadata = pandas.read_excel(stam_filename, sheet_name="Sheet1")
    return stam_metadata[["accession", "Parental TCID:", "child TCID"]].set_index("accession").loc[accession].to_list()
    
get_tube_ids("ENCBS958MXN")


[61199, 'TC74676']

In [21]:

result = []
for key in sorted(matching_jumpgate):
    samples = matching_jumpgate[key]
    row = []
    if len(samples) == 1:
        for sample in samples:
            row.extend(key)
            row.extend(get_tube_ids(sample[0]))
            row.extend(sample)
            row.extend(sample)
            row.append("match")
    else:
        names = {}
        for accession in samples:
            names[samples[accession][0]]  = accession
        row.extend(key)
        row.extend(get_tube_ids(names["dcc"][0]))
        row.extend(names["dcc"])
        row.extend(names["jumpgate"])
        row.append("error")
    result.append(row)
    
mapped = pandas.DataFrame(result, columns=["tissue", "age(day)", "sex", "replicate", "parent TCID", "TCID", "dcc_biosample", "dcc_jumpgate", "caltech_biosample", "caltech_jumpgate", "matching"]).sort_values("dcc_jumpgate")
mapped


Unnamed: 0,tissue,age(day),sex,replicate,parent TCID,TCID,dcc_biosample,dcc_jumpgate,caltech_biosample,caltech_jumpgate,matching


In [None]:
#mapped.to_csv("/dev/shm/mouse-spleen-dcc-to-caltech.csv", index=False)

In [None]:
#mapped.set_index("caltech_jumpgate").sort_index()

In [None]:
stam_metadata[["child TCID", "accession",]].set_index("child TCID").loc["TC74675", "accession"]

In [None]:
def get_accession_by_child_tube(tube_id):
    stam_metadata = pandas.read_excel(stam_filename, sheet_name="Sheet1")
    metadata = stam_metadata[["child TCID", "accession",]].set_index("child TCID")
    return metadata.loc[tube_id, "accession"]
    
get_accession_by_child_tube("TC74675")


In [None]:
biosample_sheet = pandas.read_excel(spreadsheet_name, sheet_name='Biosample', header=0, engine=engine)

#accession = []
for i, row in biosample_sheet.iterrows():
    if pandas.isnull(row.accession):
        biosample_sheet.loc[i, 'accession'] = get_accession_by_child_tube(row["TC:skip"])
                                                                      
biosample_sheet.to_excel("/dev/shm/biosample.xlsx")

# Retrieve library starting amount

In [22]:
print(spreadsheet_name)
libraries = pandas.read_excel(spreadsheet_name, sheet_name='Library', header=0, engine=engine)

fragment_size = []
for i, row in libraries.iterrows():
    library_id = row["library_id:skip"]
    library_info = htsw.get_library(library_id)    
    if pandas.isnull(row["average_fragment_size:integer"]):
        fragment_size.append(library_info["insert_size"])
    else:
        assert library_info["insert_size"] == row["average_fragment_size:integer"], "{} {} {}!={}".format(i, library_id, library_info["insert_size"], row["average_fragment_size:integer"])
        fragment_size.append("{} pass".format(row["average_fragment_size:integer"]))
    
print("\n".join([str(x) for x in fragment_size]))

/home/diane/woldlab/ENCODE/stranded-25201-25235-mouse-spleen.xlsx
279 pass
262 pass
268 pass
303 pass
266 pass
256 pass
271 pass
272 pass
265 pass
274 pass
268 pass
249 pass
262 pass
275 pass
265 pass
271 pass
273 pass
263 pass
267 pass
264 pass
271 pass
264 pass
272 pass
250 pass
228 pass
252 pass
265 pass
268 pass
256 pass
261 pass
256 pass
266 pass
269 pass
259 pass
258 pass


# Check Library Names

In [23]:
print(spreadsheet_name)
biosample_sheet = pandas.read_excel(spreadsheet_name, sheet_name='Biosample', header=0, engine=engine)

for i, row in biosample_sheet.iterrows():
    library_id = row["library_id:skip"]
    library_info = htsw.get_library(library_id)
    sheet_name = row["description"]
    library_name = library_info["library_name"]
    print(sheet_name == library_name, library_id, sheet_name, library_name)

    
#print("\n".join([str(x) for x in fragment_size]))

/home/diane/woldlab/ENCODE/stranded-25201-25235-mouse-spleen.xlsx
True 25201 MouseF1 of C57BL/6 (f) x CAST Eij (m)_m.CD19_spleen_F_67days_1_ENC4_cDNA_901 MouseF1 of C57BL/6 (f) x CAST Eij (m)_m.CD19_spleen_F_67days_1_ENC4_cDNA_901
True 25202 MouseF1 of C57BL/6 (f) x CAST Eij (m)_m.CD19_spleen_F_67days_2_ENC4_cDNA_902 MouseF1 of C57BL/6 (f) x CAST Eij (m)_m.CD19_spleen_F_67days_2_ENC4_cDNA_902
True 25203 MouseF1 of C57BL/6 (f) x CAST Eij (m)_m.CD19_spleen_F_67days_3_ENC4_cDNA_903 MouseF1 of C57BL/6 (f) x CAST Eij (m)_m.CD19_spleen_F_67days_3_ENC4_cDNA_903
True 25204 MouseF1 of C57BL/6 (f) x CAST Eij (m)_m.CD19_spleen_F_67days_4_ENC4_cDNA_904 MouseF1 of C57BL/6 (f) x CAST Eij (m)_m.CD19_spleen_F_67days_4_ENC4_cDNA_904
True 25205 MouseF1 of C57BL/6 (f) x CAST Eij (m)_m.CD19_spleen_M_67days_1_ENC4_cDNA_905 MouseF1 of C57BL/6 (f) x CAST Eij (m)_m.CD19_spleen_M_67days_1_ENC4_cDNA_905
True 25206 MouseF1 of C57BL/6 (f) x CAST Eij (m)_m.CD19_spleen_M_67days_2_ENC4_cDNA_906 MouseF1 of C57BL/6 (f

# Register Biosamples

In [7]:
biosample = pandas.read_excel(spreadsheet_name, sheet_name='Biosample', header=0, engine=engine)
created = server.post_sheet('/biosamples/', biosample, 
                            verbose=True, 
                            dry_run=True,
                            validator=validator)
print(len(created))

0


In [25]:
if created:
    biosample.to_excel('/dev/shm/biosamples.xlsx', index=False)

# Register Libraries

In [12]:
print(spreadsheet_name)
libraries = pandas.read_excel(spreadsheet_name, sheet_name='Library', header=0, engine=engine)
created = server.post_sheet('/libraries/', 
                            libraries,
                            verbose=True,
                            dry_run=True,
                            validator=validator)
print(len(created))

/home/diane/woldlab/ENCODE/stranded-25201-25235-mouse-spleen.xlsx
0


In [11]:
if created:
    libraries.to_excel('/dev/shm/libraries.xlsx', index=False)

# Register Experiments

In [None]:
name = "MouseF1_of_C57BL/6_(f)_x_CAST_Eij_(m)_m.CD19_spleen_F_62days"
re.match("[a-zA-Z\\d_$.+!*,()'-]+(?:\\s[a-zA-Z\\d_$.+!*,()'-]+)*$", name.replace("C57BL/6","C57BL6"))


In [13]:
print(server.server)
experiments = pandas.read_excel(spreadsheet_name, sheet_name='Experiment', header=0, engine=engine)
experiments = experiments[experiments['accession'] != 'barbara approval needed']
created = server.post_sheet('/experiments/', 
                            experiments, 
                            verbose=True, 
                            dry_run=True, 
                            validator=validator)
print(len(created))

www.encodeproject.org
0


In [36]:
if created:
    experiments.to_excel('/dev/shm/experiments.xlsx', index=False)

# Register Replicates

In [17]:
print(server.server)
print(spreadsheet_name)
replicates = pandas.read_excel(spreadsheet_name, sheet_name='Replicate', header=0, engine=engine)
replicates = replicates[replicates['uuid'] != 'barbara approval needed']
created = server.post_sheet('/replicates/',
                            replicates, 
                            verbose=True, 
                            dry_run=True, 
                            validator=validator)
print(len(created))

www.encodeproject.org
/home/diane/woldlab/ENCODE/stranded-25201-25235-mouse-spleen.xlsx
0


In [16]:
if created:
    replicates.to_excel('/dev/shm/replicates.xlsx', index=False)

# Check Files

In [46]:
files = pandas.read_excel(spreadsheet_name, sheet_name='File', header=0, engine=engine)
created = server.post_sheet('/files/', files, verbose=True, dry_run=True, validator=validator)
print(len(created))

33
