In [2]:
import pandas as pd
import numpy as np
from collections import Counter, defaultdict
import glob
import seaborn as sns
import gzip
import matplotlib.pyplot as plt


In [27]:
# weissman sgRNAs
library = pd.read_csv("TableS3_hCRISPRiv2_libraries.csv")

weissman_guides = [x.upper() for x in list(library["protospacer sequence"])]
library.head()

Unnamed: 0,sgID,gene,transcript,protospacer sequence,selection rank,predicted score,empirical score,off-target stringency,CRISPRi-v2.1 predicted score,Sublibrary,Sublibrary half
0,A1BG_-_58858617.23-P1,A1BG,P1,GGAGACCCAGCGCTAACCAG,1.0,1.008816,,0,0.685071,h3,Top5
1,A1BG_-_58858788.23-P1,A1BG,P1,GGGGCACCCAGGAGCGGTAG,2.0,0.901176,,0,0.782793,h3,Top5
2,A1BG_+_58858964.23-P1,A1BG,P1,GCTCCGGGCGACGTGGAGTG,3.0,0.836188,,0,0.870837,h3,Top5
3,A1BG_-_58858630.23-P1,A1BG,P1,GAACCAGGGGTGCCCAAGGG,4.0,0.827551,,0,0.590668,h3,Top5
4,A1BG_+_58858549.23-P1,A1BG,P1,GGCGAGGAACCGCCCAGCAA,5.0,0.775395,,0,0.49228,h3,Top5


In [14]:
def get_lines(fn, counter, start_at_zero=False):
    i = 0
    with gzip.open(fn,'r') as fin:
        for line in fin:
            i += 1
            if ((i + 2) % 4 == 0): 
                if (i % 1000002 == 0):
                    print(i)
                # change the first letter to G because it's often read wrong
                # also read the first 20
                if not start_at_zero: seq = 'G' + line.rstrip()[1:20].decode() 
                else: seq = 'G' + line.rstrip()[:19].decode() 
                
                if seq not in counter: counter[seq] = 1
                else: counter[seq] += 1
        
    return(counter)

# get dictionary of reads for all files
def get_read_dict(fn1, fn2, fn3, fn4, fn5):
    print(fn1)
    counter = defaultdict(lambda: 0)
    
    counter = get_lines(fn1, counter)
    counter = get_lines(fn2, counter)
    counter = get_lines(fn3, counter)
    counter = get_lines(fn4, counter)
    counter = get_lines(fn5, counter, start_at_zero=True)  # these were from the third run where the first base is not G 

    
    return(dict(counter))

In [46]:
samples_to_dict = defaultdict(lambda: Counter())
for fn in glob.glob("raw_fastqs/*.gz"):
    cond = fn.replace("raw_fastqs/220311Van_D22-2700_NA_sequence_", "").replace(".fastq.gz", "")
    print(cond)
    samples_to_dict[cond] = get_lines(fn, samples_to_dict[cond], start_at_zero=False)

K562_BRQ_CD235a_High
1000002
3000006
5000010
7000014
9000018
11000022
13000026
15000030
17000034
19000038
21000042
23000046
25000050
27000054
29000058
31000062
33000066
35000070
K562_BRQ_CD235a_Low
1000002
3000006
5000010
7000014
9000018
11000022
13000026
15000030
17000034
19000038
21000042
23000046
25000050
27000054
29000058
31000062
33000066
35000070
37000074
K562_DMSO_CD235a_All
1000002
3000006
5000010
7000014
9000018
11000022
13000026
15000030
17000034
19000038
21000042
23000046
25000050
27000054
29000058
31000062
33000066
35000070
37000074
39000078
K562_DMSO_CD235a_High
1000002
3000006
5000010
7000014
9000018
11000022
13000026
15000030
17000034
19000038
21000042
23000046
25000050
27000054
29000058
31000062
33000066
35000070
37000074
39000078
K562_DMSO_CD235a_Low
1000002
1000002
3000006
5000010
7000014
9000018
11000022
13000026
15000030
17000034
19000038
21000042
23000046
25000050
27000054
29000058
31000062
33000066
K562_DMSO_CD61_High
1000002
3000006
5000010
7000014
9000018
110000

In [36]:
library["cts"] = guide_cts
library.head()

Unnamed: 0,sgID,gene,transcript,protospacer sequence,selection rank,predicted score,empirical score,off-target stringency,CRISPRi-v2.1 predicted score,Sublibrary,Sublibrary half,cts
0,A1BG_-_58858617.23-P1,A1BG,P1,GGAGACCCAGCGCTAACCAG,1.0,1.008816,,0,0.685071,h3,Top5,0
1,A1BG_-_58858788.23-P1,A1BG,P1,GGGGCACCCAGGAGCGGTAG,2.0,0.901176,,0,0.782793,h3,Top5,0
2,A1BG_+_58858964.23-P1,A1BG,P1,GCTCCGGGCGACGTGGAGTG,3.0,0.836188,,0,0.870837,h3,Top5,0
3,A1BG_-_58858630.23-P1,A1BG,P1,GAACCAGGGGTGCCCAAGGG,4.0,0.827551,,0,0.590668,h3,Top5,0
4,A1BG_+_58858549.23-P1,A1BG,P1,GGCGAGGAACCGCCCAGCAA,5.0,0.775395,,0,0.49228,h3,Top5,0


In [40]:
library[library["gene"] == "CPSF1"]

Unnamed: 0,sgID,gene,transcript,protospacer sequence,selection rank,predicted score,empirical score,off-target stringency,CRISPRi-v2.1 predicted score,Sublibrary,Sublibrary half,cts
39830,CPSF1_-_145634507.23-P1P2,CPSF1,P1P2,GTACATGGAGAACTCCAGAC,1.0,,1.149905,0,0.761732,h4,Top5,497
39831,CPSF1_-_145634725.23-P1P2,CPSF1,P1P2,GGACAGCAGCGAACTCAGTC,2.0,,0.977292,0,0.695598,h4,Top5,460
39832,CPSF1_-_145634704.23-P1P2,CPSF1,P1P2,GAGCCGACTCGAGAGGAACC,3.0,0.8227,0.807319,0,0.873691,h4,Top5,653
39833,CPSF1_+_145634667.23-P1P2,CPSF1,P1P2,GTCGGCTCCAACTGCCAGGT,4.0,0.882392,,0,0.889822,h4,Top5,646
39834,CPSF1_-_145634697.23-P1P2,CPSF1,P1P2,GCAGTTGGAGCCGACTCGAG,5.0,0.824519,,0,0.909527,h4,Top5,374
39835,CPSF1_-_145634511.23-P1P2,CPSF1,P1P2,GTGGAGAACTCCAGACCGGT,6.0,0.77997,,0,0.754372,h4,Supp5,736
39836,CPSF1_+_145634685.23-P1P2,CPSF1,P1P2,GGTCCCGGTTCCTCTCGAGT,7.0,0.774557,,0,0.861061,h4,Supp5,888
39837,CPSF1_+_145634671.23-P1P2,CPSF1,P1P2,GCGAGTCGGCTCCAACTGCC,8.0,0.742082,,0,0.698324,h4,Supp5,1008
39838,CPSF1_-_145634203.23-P1P2,CPSF1,P1P2,GCTTCCCGGTGGGGTAGGAa,9.0,0.718066,,0,0.443279,h4,Supp5,653
39839,CPSF1_+_145634659.23-P1P2,CPSF1,P1P2,GAACTGCCAGGTgggcggcc,10.0,0.651736,,0,0.653253,h4,Supp5,908


In [65]:
import pickle
pickle.dump(samples_to_dict, open("230727_bd246_mapping.pkl", "wb"))

PicklingError: Can't pickle <function <lambda> at 0x7f6225947b90>: attribute lookup <lambda> on __main__ failed

In [50]:
samples_to_dict['K562_BRQ_CD235a_High']

Counter({'GCCCANGAGCCCGACTTCAA': 1,
         'GACGCNGCCCCACCATAGAG': 2,
         'GCCTCNGCGAGACCACCCGT': 1,
         'GCCGGNACCGGTGTCCCCGA': 1,
         'GGCCCNCGCGTGCCAGCCGA': 1,
         'GTGCCNACACTGACCCCGTG': 2,
         'GTTTCCAGAGGGATATCCTA': 560,
         'GCCCAGCGGCCCCGGTCCCG': 283,
         'GACTCATGCACTCACGGCCA': 533,
         'GGACCAGTCGGCGATCCCAA': 572,
         'GGCCATGCACCCAGTCTTCG': 648,
         'GTGGGCCGGCCCGAAGGTGG': 646,
         'GCGGAGAACAATATGGCGGA': 297,
         'GCCAGGTGCTCGGAGAAACC': 391,
         'GGACGCCAGAGAGACTCGTG': 640,
         'GACCAGGATGGGCACCACCC': 138439,
         'GCGGCCGGGGACTGACGGTA': 478,
         'GGGCCAGGAATAAAGGATCT': 513,
         'GCGAGGCGCGTAAGTGGGGG': 539,
         'GTGTACCCTGCTACTCCTGA': 577,
         'GGCCCGACCCCACGCCAAGA': 624,
         'GTGGCGTCGGTCCCTCGGTC': 661,
         'GCCAAGCCCCGCAGCTACGG': 688,
         'GGCTCTCCCCGCGTCCAAGA': 643,
         'GGCTCCCAAGGCCTCTACGT': 434,
         'GAGCGCAAGCAGCGGGTTAG': 669,
         'GGCGGCGGCGT

In [82]:
# a dict of count vectors
sample_to_counts = {}
for i in list(samples_to_dict.keys())[:34]:
    sample_to_counts[i] = [samples_to_dict[i].get(k, 0) for k in weissman_guides]

In [84]:
counts = pd.DataFrame.from_dict(sample_to_counts)
counts.index = library["sgID"]
counts.columns = list(samples_to_dict.keys())[:34]

counts.to_csv("230727_bd246_sgrna_counts.csv")
counts.head()

Unnamed: 0_level_0,K562_BRQ_CD235a_High,K562_BRQ_CD235a_Low,K562_DMSO_CD235a_All,K562_DMSO_CD235a_High,K562_DMSO_CD235a_Low,K562_DMSO_CD61_All,K562_DMSO_CD61_High,K562_DMSO_CD61_Low,K562_HU_CD235a_All,K562_HU_CD235a_High,...,THP-1_DMSO_CD14_Low,THP-1_HU_CD11b_All,THP-1_HU_CD11b_High,THP-1_HU_CD11b_Low,THP-1_HU_CD14_All,THP-1_HU_CD14_High,THP-1_HU_CD14_Low,THP-1_PMA_CD14_High,THP-1_PMA_CD14_Low,THP-1___Pre
sgID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A1BG_-_58858617.23-P1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A1BG_-_58858788.23-P1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A1BG_+_58858964.23-P1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A1BG_-_58858630.23-P1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A1BG_+_58858549.23-P1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [85]:
counts_valid = counts[counts.sum(axis=1) > 10]
counts_valid.to_csv("230727_bd246_sgrna_counts_validonly.csv")

counts_valid

Unnamed: 0_level_0,K562_BRQ_CD235a_High,K562_BRQ_CD235a_Low,K562_DMSO_CD235a_All,K562_DMSO_CD235a_High,K562_DMSO_CD235a_Low,K562_DMSO_CD61_All,K562_DMSO_CD61_High,K562_DMSO_CD61_Low,K562_HU_CD235a_All,K562_HU_CD235a_High,...,THP-1_DMSO_CD14_Low,THP-1_HU_CD11b_All,THP-1_HU_CD11b_High,THP-1_HU_CD11b_Low,THP-1_HU_CD14_All,THP-1_HU_CD14_High,THP-1_HU_CD14_Low,THP-1_PMA_CD14_High,THP-1_PMA_CD14_Low,THP-1___Pre
sgID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAAS_-_53715438.23-P1P2,4431,4124,6068,4822,4258,4806,3580,4088,4731,6100,...,2970,2410,1113,3961,2226,1879,2687,3917,4397,3965
AAAS_+_53715355.23-P1P2,624,787,824,824,660,818,501,494,675,954,...,490,417,357,533,429,359,401,700,597,719
AAAS_-_53715389.23-P1P2,621,719,545,692,656,481,369,407,631,1031,...,362,343,168,517,300,156,329,779,410,466
AAAS_+_53714983.23-P1P2,363,223,511,436,483,511,320,300,335,686,...,423,213,198,329,215,275,206,201,362,453
AAAS_-_53715371.23-P1P2,767,470,628,813,667,501,450,333,494,419,...,351,185,120,425,325,208,350,365,364,436
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
non-targeting_03742,395,405,601,637,652,328,268,284,555,552,...,381,279,239,372,278,376,330,387,403,593
non-targeting_03745,556,586,750,798,813,629,575,474,591,723,...,509,308,244,373,408,368,369,749,825,636
non-targeting_03755,523,661,610,363,587,608,471,380,701,638,...,471,336,239,360,273,323,342,447,508,587
non-targeting_03772,521,442,659,622,608,698,446,409,451,888,...,408,354,233,339,275,241,318,566,438,544


## MAGECK

In [99]:
counts_mageck = pd.read_csv("230727_bd246_sgrna_counts_validonly.csv", index_col=0)
counts_mageck.head()

Unnamed: 0_level_0,K562_BRQ_CD235a_High,K562_BRQ_CD235a_Low,K562_DMSO_CD235a_All,K562_DMSO_CD235a_High,K562_DMSO_CD235a_Low,K562_DMSO_CD61_All,K562_DMSO_CD61_High,K562_DMSO_CD61_Low,K562_HU_CD235a_All,K562_HU_CD235a_High,...,THP-1_DMSO_CD14_Low,THP-1_HU_CD11b_All,THP-1_HU_CD11b_High,THP-1_HU_CD11b_Low,THP-1_HU_CD14_All,THP-1_HU_CD14_High,THP-1_HU_CD14_Low,THP-1_PMA_CD14_High,THP-1_PMA_CD14_Low,THP-1___Pre
sgID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAAS_-_53715438.23-P1P2,4431,4124,6068,4822,4258,4806,3580,4088,4731,6100,...,2970,2410,1113,3961,2226,1879,2687,3917,4397,3965
AAAS_+_53715355.23-P1P2,624,787,824,824,660,818,501,494,675,954,...,490,417,357,533,429,359,401,700,597,719
AAAS_-_53715389.23-P1P2,621,719,545,692,656,481,369,407,631,1031,...,362,343,168,517,300,156,329,779,410,466
AAAS_+_53714983.23-P1P2,363,223,511,436,483,511,320,300,335,686,...,423,213,198,329,215,275,206,201,362,453
AAAS_-_53715371.23-P1P2,767,470,628,813,667,501,450,333,494,419,...,351,185,120,425,325,208,350,365,364,436


In [97]:
col_list

['gene_name',
 'K562_BRQ_CD235a_Low',
 'K562_DMSO_CD235a_All',
 'K562_DMSO_CD235a_High',
 'K562_DMSO_CD235a_Low',
 'K562_DMSO_CD61_All',
 'K562_DMSO_CD61_High',
 'K562_DMSO_CD61_Low',
 'K562_HU_CD235a_All',
 'K562_HU_CD235a_High',
 'K562_HU_CD235a_Low',
 'K562_PMA_CD235a_High',
 'K562_PMA_CD235a_Low',
 'K562___Pre',
 'THP-1_BRQ_CD11b_All',
 'THP-1_BRQ_CD11b_High',
 'THP-1_BRQ_CD11b_Low',
 'THP-1_BRQ_CD14_High',
 'THP-1_BRQ_CD14_Low',
 'THP-1_DMSO_CD11b_All',
 'THP-1_DMSO_CD11b_High',
 'THP-1_DMSO_CD11b_Low',
 'THP-1_DMSO_CD14_All',
 'THP-1_DMSO_CD14_High',
 'THP-1_DMSO_CD14_Low',
 'THP-1_HU_CD11b_All',
 'THP-1_HU_CD11b_High',
 'THP-1_HU_CD11b_Low',
 'THP-1_HU_CD14_All',
 'THP-1_HU_CD14_High',
 'THP-1_HU_CD14_Low',
 'THP-1_PMA_CD14_High',
 'THP-1_PMA_CD14_Low',
 'THP-1___Pre']

In [100]:
# Making a file for MAGECK:
gene_name = [x.split('_')[0] for x in counts_mageck.index]
counts_mageck["gene_name"] = gene_name
col_list = ["gene_name"]
col_list.extend(counts_mageck.columns[:34])

counts_mageck = counts_mageck[col_list]
counts_mageck.head()

Unnamed: 0_level_0,gene_name,K562_BRQ_CD235a_High,K562_BRQ_CD235a_Low,K562_DMSO_CD235a_All,K562_DMSO_CD235a_High,K562_DMSO_CD235a_Low,K562_DMSO_CD61_All,K562_DMSO_CD61_High,K562_DMSO_CD61_Low,K562_HU_CD235a_All,...,THP-1_DMSO_CD14_Low,THP-1_HU_CD11b_All,THP-1_HU_CD11b_High,THP-1_HU_CD11b_Low,THP-1_HU_CD14_All,THP-1_HU_CD14_High,THP-1_HU_CD14_Low,THP-1_PMA_CD14_High,THP-1_PMA_CD14_Low,THP-1___Pre
sgID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAAS_-_53715438.23-P1P2,AAAS,4431,4124,6068,4822,4258,4806,3580,4088,4731,...,2970,2410,1113,3961,2226,1879,2687,3917,4397,3965
AAAS_+_53715355.23-P1P2,AAAS,624,787,824,824,660,818,501,494,675,...,490,417,357,533,429,359,401,700,597,719
AAAS_-_53715389.23-P1P2,AAAS,621,719,545,692,656,481,369,407,631,...,362,343,168,517,300,156,329,779,410,466
AAAS_+_53714983.23-P1P2,AAAS,363,223,511,436,483,511,320,300,335,...,423,213,198,329,215,275,206,201,362,453
AAAS_-_53715371.23-P1P2,AAAS,767,470,628,813,667,501,450,333,494,...,351,185,120,425,325,208,350,365,364,436


In [130]:
d = dict(Counter(counts_mageck["gene_name"]))
df = pd.DataFrame.from_dict(d, orient='index')
df.columns = ["num_sgrna"]

In [140]:
counts_mageck[counts_mageck["gene_name"] == "PELO"]

Unnamed: 0_level_0,gene_name,K562_BRQ_CD235a_High,K562_BRQ_CD235a_Low,K562_DMSO_CD235a_All,K562_DMSO_CD235a_High,K562_DMSO_CD235a_Low,K562_DMSO_CD61_All,K562_DMSO_CD61_High,K562_DMSO_CD61_Low,K562_HU_CD235a_All,...,THP-1_DMSO_CD14_Low,THP-1_HU_CD11b_All,THP-1_HU_CD11b_High,THP-1_HU_CD11b_Low,THP-1_HU_CD14_All,THP-1_HU_CD14_High,THP-1_HU_CD14_Low,THP-1_PMA_CD14_High,THP-1_PMA_CD14_Low,THP-1___Pre
sgID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
PELO_-_52083812.23-P2,PELO,447,426,339,490,381,372,264,265,444,...,292,201,159,210,245,146,212,395,426,303
PELO_-_52084090.23-P2,PELO,542,807,875,827,601,529,680,318,508,...,432,401,389,483,514,360,402,788,794,825
PELO_+_52084210.23-P2,PELO,635,574,715,721,771,714,666,485,758,...,374,407,344,395,453,328,488,678,579,676
PELO_+_52084207.23-P2,PELO,705,605,771,787,1000,630,631,469,814,...,391,449,335,453,431,411,371,538,695,765
PELO_+_52083762.23-P2,PELO,602,382,425,520,516,400,230,220,327,...,235,266,208,267,316,305,177,409,347,446
PELO_+_52084161.23-P2,PELO,790,801,752,777,957,1068,532,553,664,...,606,453,342,412,509,482,640,789,847,758
PELO_-_52084105.23-P2,PELO,296,366,379,362,305,347,222,207,210,...,164,117,205,156,190,178,176,247,307,348
PELO_-_52084070.23-P2,PELO,799,886,819,879,792,686,483,480,529,...,596,439,390,521,476,451,447,646,822,811


In [152]:
# get rid of all genes with fewer than 8 sgRNAs
df[df["num_sgrna"] < 8]

Unnamed: 0,num_sgrna
ACTN3,1
ADRB1,1
AIDA,3
AK5,1
AMDHD1,2
...,...
XRN2,1
ZNF385D,1
ZNF605,1
ZNF700,1


In [153]:
valid_genes = df[df["num_sgrna"] >= 8].index
counts_mageck = counts_mageck[counts_mageck["gene_name"].isin(list(valid_genes))]

In [154]:
counts_mageck.to_csv("./230727_bd246_counts_mageck.txt", sep='\t')

In [103]:
# negative control genes -- should just be a list
neg_ctrl = counts_mageck[counts_mageck["gene_name"] == "non-targeting"].index
list(neg_ctrl)

['non-targeting_00001',
 'non-targeting_00002',
 'non-targeting_00012',
 'non-targeting_00018',
 'non-targeting_00022',
 'non-targeting_00039',
 'non-targeting_00040',
 'non-targeting_00044',
 'non-targeting_00051',
 'non-targeting_00063',
 'non-targeting_00065',
 'non-targeting_00069',
 'non-targeting_00074',
 'non-targeting_00076',
 'non-targeting_00091',
 'non-targeting_00106',
 'non-targeting_00111',
 'non-targeting_00116',
 'non-targeting_00142',
 'non-targeting_00148',
 'non-targeting_00160',
 'non-targeting_00162',
 'non-targeting_00180',
 'non-targeting_00199',
 'non-targeting_00221',
 'non-targeting_00223',
 'non-targeting_00230',
 'non-targeting_00232',
 'non-targeting_00233',
 'non-targeting_00261',
 'non-targeting_00275',
 'non-targeting_00297',
 'non-targeting_00301',
 'non-targeting_00308',
 'non-targeting_00309',
 'non-targeting_00324',
 'non-targeting_00326',
 'non-targeting_00336',
 'non-targeting_00342',
 'non-targeting_00345',
 'non-targeting_00362',
 'non-targeting_

In [109]:
with open("230727_bd246_negative_controls.txt", 'w') as f:
    for n in neg_ctrl:
        f.write(n)
        f.write("\n")