# Introduction

On todays cell anotation call there was a complaint that there were frequent barcode collisions in the analysis.

It's not entirely clear how frequently they should collide.

In [1]:
import pandas
import sys
from tqdm import tqdm
from collections import Counter
import tempfile

In [2]:
%run -m pip install encoded_client



In [3]:
%run -m pip install mex_gene_archive



In [4]:
from encoded_client.encoded import ENCODED
from mex_gene_archive.reader import read_mex_archive

In [5]:
server = ENCODED("www.encodeproject.org")

In [6]:
splitseq = server.get_json("https://www.encodeproject.org/search/?type=Experiment&control_type!=*&status=released&perturbed=false&lab.title=Ali+Mortazavi%2C+UCI&assay_title=scRNA-seq&replicates.library.construction_method=Parse+Single+Cell+Whole+Transcriptome+Kit")


requesting url: https://www.encodeproject.org/search/?type=Experiment&control_type!=*&status=released&perturbed=false&lab.title=Ali+Mortazavi%2C+UCI&assay_title=scRNA-seq&replicates.library.construction_method=Parse+Single+Cell+Whole+Transcriptome+Kit


In [7]:
def get_analyses(file):
    analyses = file.get("analyses")
    if analyses is None:
        return []
    else:
        return [a["@id"] for a in analyses]
    
def get_barcodes(server, file):
    href = file["href"]
    response = server.get_response(href, stream=True)
    result = read_mex_archive(fileobj=response.raw)
        
    return (result["metadata"], result["barcodes"])

In [8]:
collisions = {}
experiment_info = {}
for i, row in enumerate(splitseq["@graph"]):
    print(i, len(splitseq["@graph"]))
    if "Experiment" in row["@type"]:
        accession = row["accession"]
        experiment = server.get_json("/experiment/{}/".format(accession))
        experiment_info[accession] = {
            "ontology_term_name": experiment["biosample_ontology"]["term_name"],
            "biosample_classification": experiment["biosample_ontology"]["classification"],
            "replication_type": experiment["replication_type"],   
        }
        default_analysis = experiment["default_analysis"]
        for f in experiment["files"]:
            if f["output_type"] == "unfiltered sparse gene count matrix of all reads" and default_analysis in get_analyses(f):
                #print(accession, f["accession"])
                manifest, barcodes = get_barcodes(server, f)
                for barcode in barcodes:
                    collisions.setdefault(barcode, []).append((manifest["experiment_accession"], manifest["library_accession"]))
    

0 221
requesting url: https://www.encodeproject.org/experiment/ENCSR399TQO/
requesting url: https://www.encodeproject.org/files/ENCFF144AJU/@@download/ENCFF144AJU.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF724IZZ/@@download/ENCFF724IZZ.tar.gz
1 221
requesting url: https://www.encodeproject.org/experiment/ENCSR435CMT/
requesting url: https://www.encodeproject.org/files/ENCFF592GXV/@@download/ENCFF592GXV.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF245HDH/@@download/ENCFF245HDH.tar.gz
2 221
requesting url: https://www.encodeproject.org/experiment/ENCSR644SME/
requesting url: https://www.encodeproject.org/files/ENCFF438DYL/@@download/ENCFF438DYL.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF608RMA/@@download/ENCFF608RMA.tar.gz
3 221
requesting url: https://www.encodeproject.org/experiment/ENCSR399EUZ/
requesting url: https://www.encodeproject.org/files/ENCFF674YGW/@@download/ENCFF674YGW.tar.gz
requesting url: https://www.encodeproj

31 221
requesting url: https://www.encodeproject.org/experiment/ENCSR823CLM/
requesting url: https://www.encodeproject.org/files/ENCFF883PAZ/@@download/ENCFF883PAZ.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF534RLH/@@download/ENCFF534RLH.tar.gz
32 221
requesting url: https://www.encodeproject.org/experiment/ENCSR329JYG/
requesting url: https://www.encodeproject.org/files/ENCFF804TTW/@@download/ENCFF804TTW.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF394DHC/@@download/ENCFF394DHC.tar.gz
33 221
requesting url: https://www.encodeproject.org/experiment/ENCSR305VTL/
requesting url: https://www.encodeproject.org/files/ENCFF793IPC/@@download/ENCFF793IPC.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF827YMI/@@download/ENCFF827YMI.tar.gz
34 221
requesting url: https://www.encodeproject.org/experiment/ENCSR659DFF/
requesting url: https://www.encodeproject.org/files/ENCFF020HJU/@@download/ENCFF020HJU.tar.gz
requesting url: https://www.encode

62 221
requesting url: https://www.encodeproject.org/experiment/ENCSR488XVC/
requesting url: https://www.encodeproject.org/files/ENCFF262GLU/@@download/ENCFF262GLU.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF708DBJ/@@download/ENCFF708DBJ.tar.gz
63 221
requesting url: https://www.encodeproject.org/experiment/ENCSR823HRL/
requesting url: https://www.encodeproject.org/files/ENCFF293TRJ/@@download/ENCFF293TRJ.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF162QTN/@@download/ENCFF162QTN.tar.gz
64 221
requesting url: https://www.encodeproject.org/experiment/ENCSR137UJM/
requesting url: https://www.encodeproject.org/files/ENCFF489LKH/@@download/ENCFF489LKH.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF565NTX/@@download/ENCFF565NTX.tar.gz
65 221
requesting url: https://www.encodeproject.org/experiment/ENCSR730JFZ/
requesting url: https://www.encodeproject.org/files/ENCFF900MWO/@@download/ENCFF900MWO.tar.gz
requesting url: https://www.encode

93 221
requesting url: https://www.encodeproject.org/experiment/ENCSR320EFC/
requesting url: https://www.encodeproject.org/files/ENCFF692EVE/@@download/ENCFF692EVE.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF718SJZ/@@download/ENCFF718SJZ.tar.gz
94 221
requesting url: https://www.encodeproject.org/experiment/ENCSR291RIX/
requesting url: https://www.encodeproject.org/files/ENCFF007MDK/@@download/ENCFF007MDK.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF707WVW/@@download/ENCFF707WVW.tar.gz
95 221
requesting url: https://www.encodeproject.org/experiment/ENCSR277UKO/
requesting url: https://www.encodeproject.org/files/ENCFF040FDO/@@download/ENCFF040FDO.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF586RZU/@@download/ENCFF586RZU.tar.gz
96 221
requesting url: https://www.encodeproject.org/experiment/ENCSR403MNL/
requesting url: https://www.encodeproject.org/files/ENCFF382JCD/@@download/ENCFF382JCD.tar.gz
requesting url: https://www.encode

125 221
requesting url: https://www.encodeproject.org/experiment/ENCSR358NKP/
requesting url: https://www.encodeproject.org/files/ENCFF489TJK/@@download/ENCFF489TJK.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF773WGE/@@download/ENCFF773WGE.tar.gz
126 221
requesting url: https://www.encodeproject.org/experiment/ENCSR826EHD/
requesting url: https://www.encodeproject.org/files/ENCFF359FSC/@@download/ENCFF359FSC.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF252PKI/@@download/ENCFF252PKI.tar.gz
127 221
requesting url: https://www.encodeproject.org/experiment/ENCSR396XOO/
requesting url: https://www.encodeproject.org/files/ENCFF305GTC/@@download/ENCFF305GTC.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF756MCT/@@download/ENCFF756MCT.tar.gz
128 221
requesting url: https://www.encodeproject.org/experiment/ENCSR531SRE/
requesting url: https://www.encodeproject.org/files/ENCFF072FGQ/@@download/ENCFF072FGQ.tar.gz
requesting url: https://www.en

requesting url: https://www.encodeproject.org/files/ENCFF439NTJ/@@download/ENCFF439NTJ.tar.gz
157 221
requesting url: https://www.encodeproject.org/experiment/ENCSR567YKE/
requesting url: https://www.encodeproject.org/files/ENCFF573JGO/@@download/ENCFF573JGO.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF027EOI/@@download/ENCFF027EOI.tar.gz
158 221
requesting url: https://www.encodeproject.org/experiment/ENCSR975YAQ/
requesting url: https://www.encodeproject.org/files/ENCFF819FJO/@@download/ENCFF819FJO.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF559QFA/@@download/ENCFF559QFA.tar.gz
159 221
requesting url: https://www.encodeproject.org/experiment/ENCSR794RPU/
requesting url: https://www.encodeproject.org/files/ENCFF100LQA/@@download/ENCFF100LQA.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF068JJH/@@download/ENCFF068JJH.tar.gz
160 221
requesting url: https://www.encodeproject.org/experiment/ENCSR165FLF/
requesting url: https://www.en

requesting url: https://www.encodeproject.org/files/ENCFF227GTS/@@download/ENCFF227GTS.tar.gz
188 221
requesting url: https://www.encodeproject.org/experiment/ENCSR855URG/
requesting url: https://www.encodeproject.org/files/ENCFF473LKH/@@download/ENCFF473LKH.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF105VJG/@@download/ENCFF105VJG.tar.gz
189 221
requesting url: https://www.encodeproject.org/experiment/ENCSR693KAV/
requesting url: https://www.encodeproject.org/files/ENCFF285QGN/@@download/ENCFF285QGN.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF359ECE/@@download/ENCFF359ECE.tar.gz
190 221
requesting url: https://www.encodeproject.org/experiment/ENCSR132KMR/
requesting url: https://www.encodeproject.org/files/ENCFF116IRO/@@download/ENCFF116IRO.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF318CRR/@@download/ENCFF318CRR.tar.gz
191 221
requesting url: https://www.encodeproject.org/experiment/ENCSR773CKJ/
requesting url: https://www.en

219 221
requesting url: https://www.encodeproject.org/experiment/ENCSR253YLA/
requesting url: https://www.encodeproject.org/files/ENCFF921PJL/@@download/ENCFF921PJL.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF843JXE/@@download/ENCFF843JXE.tar.gz
220 221
requesting url: https://www.encodeproject.org/experiment/ENCSR303EIR/
requesting url: https://www.encodeproject.org/files/ENCFF477JSG/@@download/ENCFF477JSG.tar.gz
requesting url: https://www.encodeproject.org/files/ENCFF831CYO/@@download/ENCFF831CYO.tar.gz


In [9]:
count = {}
lengths = Counter()
for barcode in collisions:
    count[barcode] = len(collisions[barcode])
    lengths[len(barcode)] += 1
    
count = pandas.Series(count)

In [10]:
set(count)

{436}

In [11]:
lengths

Counter({18: 92160, 19: 350208})

In [14]:
len(collisions)

442368

In [12]:
list(collisions.keys())[:2]

['AAACATCGAAACATCG_1', 'AACAACCAAAACATCG_1']

Conclusion. The same set of barcodes are reused for every library.