# Introduction

Brian gave me an excel file with a bunch of biosamples.

In [1]:
import pandas
import re
import sys
from pathlib import Path

In [2]:
EC = str(Path("~/proj/encoded_client").expanduser())
if EC not in sys.path:
    sys.path.append(EC)
from encoded_client.encoded import ENCODED

In [3]:
server = ENCODED("www.encodeproject.org")

In [4]:
mouse_biosamples = Path("~/woldlab/ENCODE/mouse-biosamples/").expanduser()
book_filename = mouse_biosamples / "BioSample_Accession_replacements_for_pooled_samples_Jan12_2022.xlsx"

In [5]:
book = pandas.ExcelFile(book_filename)
print(book.sheet_names)

['Bernstein 2mo Aug31_2021', 'Bernstein 2mo gasthrtlctxSept15', 'PND14 Bernst Aug31_2021', 'Snyder_July22_21_2mos', 'Snyder PND14 Aug31_21', 'Stam 8mo B6Cast_Oct4_21', 'reaccession Rush brain', 'sex corrections']


In [6]:
corrections = book.parse("sex corrections")
corrections.head()

Unnamed: 0,BioSample Accession,age,sex,tissue,animal ID #,date born,date collected
0,ENCBS641RQY,8 months,female (corrected to male),adrenal gland,27,2021-10-20,2021-07-20
1,ENCBS840BRQ,8 months,female (corrected to male),cerebellum,27,2021-10-20,2021-07-20
2,ENCBS302JNY,8 months,female (corrected to male),gastrocnemius,27,2021-10-20,2021-07-20
3,ENCBS367EYF,8 months,female (corrected to male),heart,27,2021-10-20,2021-07-20
4,ENCBS059FJJ,8 months,female (corrected to male),hippocampus,27,2021-10-20,2021-07-20


In [18]:
sexes = re.compile("(?P<current>(male)|(female)) \(corrected to (?P<desired>(male)|(female))\)")
for i, row in corrections.iterrows():
    accession = row["BioSample Accession"]
    tissue = row["tissue"]
    match = sexes.search(row["sex"])
    current = match.group("current")
    desired = match.group("desired")
    biosample = server.get_json(accession)        
    print(accession, current, biosample["sex"] != current, desired, biosample["sex"] != desired)


ENCBS641RQY female True male False
ENCBS840BRQ female True male False
ENCBS302JNY female True male False
ENCBS367EYF female True male False
ENCBS059FJJ female True male False
ENCBS170SSW female True male False
ENCBS158ROD female True male False
ENCBS146CZA female True male False
ENCBS516WOV female True male False
ENCBS666FTV female True male False
ENCBS432VAU female True male False
ENCBS603DJU female True male False
ENCBS231IMT female True male False
ENCBS464MLL female True male False
ENCBS664VXG male True female False
ENCBS054OUK male True female False
ENCBS386XLW male True female False
ENCBS417NMX male True female False
ENCBS184RLN male True female False
ENCBS987EJP male True female False
ENCBS151JJZ male True female False
ENCBS447THP male True female False
ENCBS124YMM male True female False
ENCBS794LQT male True female False
ENCBS023LOR male True female False
ENCBS155NZE male True female False
ENCBS788DEB male True female False
ENCBS044NHG male True female False


In [16]:
sexes = re.compile("(?P<current>(male)|(female)) \(corrected to (?P<desired>(male)|(female))\)")
for i, row in corrections.iterrows():
    accession = row["BioSample Accession"]
    tissue = row["tissue"]
    match = sexes.search(row["sex"])
    current = match.group("current")
    desired = match.group("desired")
    biosample = server.get_json(accession)
    if biosample["sex"] != desired:
        print("{} Changing: {} to {}".format(accession, current, desired))
        server.patch_json(biosample["@id"], {"model_organism_sex": desired})
    else:
        print("{} is correct".format(accession))


ENCBS641RQY is correct
ENCBS840BRQ is correct
ENCBS302JNY is correct
ENCBS367EYF is correct
ENCBS059FJJ is correct
ENCBS170SSW is correct
ENCBS158ROD is correct
ENCBS146CZA is correct
ENCBS516WOV is correct
ENCBS666FTV is correct
ENCBS432VAU is correct
ENCBS603DJU is correct
ENCBS231IMT is correct
ENCBS464MLL is correct
ENCBS664VXG is correct
ENCBS054OUK is correct
ENCBS386XLW Changing: male to female
ENCBS417NMX Changing: male to female
ENCBS184RLN Changing: male to female
ENCBS987EJP Changing: male to female
ENCBS151JJZ Changing: male to female
ENCBS447THP Changing: male to female
ENCBS124YMM Changing: male to female
ENCBS794LQT Changing: male to female
ENCBS023LOR Changing: male to female
ENCBS155NZE Changing: male to female
ENCBS788DEB Changing: male to female
ENCBS044NHG Changing: male to female
