This notebook prepares the bulk RNAseq data for upload to GEO

In [51]:
import pandas as pd
import numpy as np
from datetime import datetime
import os

In [52]:
df = pd.read_csv('/Users/djuna/Documents/ABCA7lof2/bulkRNAseq/abca7_rna_seq_metadata_with_seqIDs.csv')


In [54]:
df

Unnamed: 0,SampleName,Line,Treatment,Vol (ul),Conc,Type,Pool,Genome,Notes,SeqID
0,E1,Control,H20,30,2-10 ng/ul,totalRNA,,human,"diff & extraction batch 1 (1 well, DOI:101624,...",D24-18553
1,E2,Control,H20,30,2-10 ng/ul,totalRNA,,human,"diff & extraction batch 1 (1 well, DOI:101624,...",D24-18554
2,Y1,Y622,H20,30,2-10 ng/ul,totalRNA,,human,"diff & extraction batch 1 (1 well, DOI:101624,...",D24-18555
3,Y2,Y622,H20,30,2-10 ng/ul,totalRNA,,human,"diff & extraction batch 1 (1 well, DOI:101624,...",D24-18556
4,Y3,Y622,H20,30,2-10 ng/ul,totalRNA,,human,"diff & extraction batch 1 (1 well, DOI:101624,...",D24-18557
5,Y4,Y622,H20,30,2-10 ng/ul,totalRNA,,human,"diff & extraction batch 1 (1 well, DOI:101624,...",D24-18558
6,Y5,Y622,H20,30,2-10 ng/ul,totalRNA,,human,"diff & extraction batch 1 (1 well, DOI:101624,...",D24-18559
7,Y6,Y622,H20,30,2-10 ng/ul,totalRNA,,human,diff & extraction batch 2 (2 wells pooled),D24-18560
8,Y7,Y622,H20,30,2-10 ng/ul,totalRNA,,human,diff & extraction batch 2 (2 wells pooled),D24-18561
9,Y8,Y622,H20,30,2-10 ng/ul,totalRNA,,human,diff & extraction batch 2 (2 wells pooled),D24-18562


In [2]:
# removing batch 2 from the counts matrix and renaming columns 

counts = pd.read_csv('/Users/djuna/Documents/ABCA7lof2/bulkRNAseq/counts.txt', sep='\t', comment="#")

df = pd.read_csv('/Users/djuna/Documents/ABCA7lof2/bulkRNAseq/abca7_rna_seq_metadata_with_seqIDs.csv')

no_batch2 = df[~df["Notes"].str.contains(r"\bbatch\s*2\b", case=False, na=False)].copy()
no_batch2['SeqID'] = [x.split('    ')[1].split(' ')[0] for x in no_batch2["SeqID"]]
keep_ids = np.array(no_batch2['SeqID'])
fixed_cols = ["Geneid", "Chr", "Start", "End", "Strand", "Length", "gene_name", "gene_type"]
columns_to_keep = ['/home/gridsan/djuna/homer/github/ABCA7lof2/bulkRNAseq/mapped/241121Tsa_' + x + '_Aligned.out.sam' for x in keep_ids]

subset = counts[fixed_cols + columns_to_keep]
subset.columns = list(subset.columns[:8])  + [x.split('_')[1].split('Aligned')[0] for x in subset.columns[8:]]

# sanity check
for i in subset.columns[8:]:
    x = np.array(subset[[i]])
    y = np.array(counts[[f'/home/gridsan/djuna/homer/github/ABCA7lof2/bulkRNAseq/mapped/241121Tsa_{i}_Aligned.out.sam']])
    print(np.array_equal(x, y))


True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True


In [3]:
# format metadata for GEO
dictionary = {'Control': 'control', 'Y622': 'p.Tyr622*', 'G2': 'p.Glu50fs*3'}
teatment_dict = {'H20': '+ H20 (last 14 days)', 'Choline2Weeks': '+ CDP-choline (last 14 days))'}
metadata_geo_format = no_batch2.copy()

# calculate days apart
fmt = "%m%d%y"
d1 = datetime.strptime('101624', fmt)
d2 = datetime.strptime('111324', fmt)
days_apart = (d2 - d1).days

# add the metadata
metadata_geo_format['library name'] = metadata_geo_format['SeqID']
metadata_geo_format['title'] = metadata_geo_format['Line'].map(dictionary) + ' ' + metadata_geo_format['Treatment'].map(teatment_dict) + ' ' + 'replicate ' + (list(np.arange(2).astype(str)) + list(np.arange(5).astype(str))*3)
metadata_geo_format['library strategy'] = 'RNA-seq'
metadata_geo_format['organism'] = metadata_geo_format['Genome']
metadata_geo_format['tissue'] = np.nan
metadata_geo_format['cell line'] = metadata_geo_format['Line'].map(dictionary)
metadata_geo_format['cell type'] = 'iPSC-derivded induced neurons'
metadata_geo_format['genotype'] = 'ABCA7' + metadata_geo_format['Line'].map(dictionary)
metadata_geo_format['treatment'] = metadata_geo_format['Treatment'].map(teatment_dict)
metadata_geo_format['time'] = 'Day ' + str(days_apart)
metadata_geo_format['molecule'] = 'total RNA'
metadata_geo_format['single or paired-end'] = 'paired-end'
metadata_geo_format['instrument model'] = 'Element AVITI'
metadata_geo_format['description'] = ['Column name in counts.txt: ' + x for x in metadata_geo_format['SeqID']]
metadata_geo_format['processed data file'] = 'counts.txt'
metadata_geo_format['processed data file 2'] = np.nan
metadata_geo_format['raw file 1'] = ['241121Tsa_' + x + '_1_sequence.fastq' for x in metadata_geo_format['SeqID']]
metadata_geo_format['raw file 2'] = ['241121Tsa_' + x + '_2_sequence.fastq' for x in metadata_geo_format['SeqID']]

columns_to_keep = ['library name', 'title', 'library strategy', 'organism', 'tissue', 'cell line', 'cell type', 'genotype', 'treatment', 'time', 'molecule', 'single or paired-end', 'instrument model', 'description', 'processed data file', 'processed data file 2', 'raw file 1', 'raw file 2']
metadata_geo_format = metadata_geo_format[columns_to_keep]
metadata_geo_format.index = np.arange(len(metadata_geo_format))

In [4]:
metadata_geo_format

Unnamed: 0,library name,title,library strategy,organism,tissue,cell line,cell type,genotype,treatment,time,molecule,single or paired-end,instrument model,description,processed data file,processed data file 2,raw file 1,raw file 2
0,D24-18553,control + H20 (last 14 days) replicate 0,RNA-seq,human,,control,iPSC-derivded induced neurons,ABCA7control,+ H20 (last 14 days),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18553,counts.txt,,241121Tsa_D24-18553_1_sequence.fastq,241121Tsa_D24-18553_2_sequence.fastq
1,D24-18554,control + H20 (last 14 days) replicate 1,RNA-seq,human,,control,iPSC-derivded induced neurons,ABCA7control,+ H20 (last 14 days),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18554,counts.txt,,241121Tsa_D24-18554_1_sequence.fastq,241121Tsa_D24-18554_2_sequence.fastq
2,D24-18555,p.Tyr622* + H20 (last 14 days) replicate 0,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ H20 (last 14 days),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18555,counts.txt,,241121Tsa_D24-18555_1_sequence.fastq,241121Tsa_D24-18555_2_sequence.fastq
3,D24-18556,p.Tyr622* + H20 (last 14 days) replicate 1,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ H20 (last 14 days),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18556,counts.txt,,241121Tsa_D24-18556_1_sequence.fastq,241121Tsa_D24-18556_2_sequence.fastq
4,D24-18557,p.Tyr622* + H20 (last 14 days) replicate 2,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ H20 (last 14 days),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18557,counts.txt,,241121Tsa_D24-18557_1_sequence.fastq,241121Tsa_D24-18557_2_sequence.fastq
5,D24-18558,p.Tyr622* + H20 (last 14 days) replicate 3,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ H20 (last 14 days),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18558,counts.txt,,241121Tsa_D24-18558_1_sequence.fastq,241121Tsa_D24-18558_2_sequence.fastq
6,D24-18559,p.Tyr622* + H20 (last 14 days) replicate 4,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ H20 (last 14 days),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18559,counts.txt,,241121Tsa_D24-18559_1_sequence.fastq,241121Tsa_D24-18559_2_sequence.fastq
7,D24-18563,p.Tyr622* + CDP-choline (last 14 days)) replic...,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ CDP-choline (last 14 days)),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18563,counts.txt,,241121Tsa_D24-18563_1_sequence.fastq,241121Tsa_D24-18563_2_sequence.fastq
8,D24-18564,p.Tyr622* + CDP-choline (last 14 days)) replic...,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ CDP-choline (last 14 days)),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18564,counts.txt,,241121Tsa_D24-18564_1_sequence.fastq,241121Tsa_D24-18564_2_sequence.fastq
9,D24-18565,p.Tyr622* + CDP-choline (last 14 days)) replic...,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ CDP-choline (last 14 days)),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18565,counts.txt,,241121Tsa_D24-18565_1_sequence.fastq,241121Tsa_D24-18565_2_sequence.fastq


In [5]:
df

Unnamed: 0,SampleName,Line,Treatment,Vol (ul),Conc,Type,Pool,Genome,Notes,SeqID
0,E1,Control,H20,30,2-10 ng/ul,totalRNA,,human,"diff & extraction batch 1 (1 well, DOI:101624,...",D24-18553
1,E2,Control,H20,30,2-10 ng/ul,totalRNA,,human,"diff & extraction batch 1 (1 well, DOI:101624,...",D24-18554
2,Y1,Y622,H20,30,2-10 ng/ul,totalRNA,,human,"diff & extraction batch 1 (1 well, DOI:101624,...",D24-18555
3,Y2,Y622,H20,30,2-10 ng/ul,totalRNA,,human,"diff & extraction batch 1 (1 well, DOI:101624,...",D24-18556
4,Y3,Y622,H20,30,2-10 ng/ul,totalRNA,,human,"diff & extraction batch 1 (1 well, DOI:101624,...",D24-18557
5,Y4,Y622,H20,30,2-10 ng/ul,totalRNA,,human,"diff & extraction batch 1 (1 well, DOI:101624,...",D24-18558
6,Y5,Y622,H20,30,2-10 ng/ul,totalRNA,,human,"diff & extraction batch 1 (1 well, DOI:101624,...",D24-18559
7,Y6,Y622,H20,30,2-10 ng/ul,totalRNA,,human,diff & extraction batch 2 (2 wells pooled),D24-18560
8,Y7,Y622,H20,30,2-10 ng/ul,totalRNA,,human,diff & extraction batch 2 (2 wells pooled),D24-18561
9,Y8,Y622,H20,30,2-10 ng/ul,totalRNA,,human,diff & extraction batch 2 (2 wells pooled),D24-18562


In [6]:
metadata_geo_format

Unnamed: 0,library name,title,library strategy,organism,tissue,cell line,cell type,genotype,treatment,time,molecule,single or paired-end,instrument model,description,processed data file,processed data file 2,raw file 1,raw file 2
0,D24-18553,control + H20 (last 14 days) replicate 0,RNA-seq,human,,control,iPSC-derivded induced neurons,ABCA7control,+ H20 (last 14 days),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18553,counts.txt,,241121Tsa_D24-18553_1_sequence.fastq,241121Tsa_D24-18553_2_sequence.fastq
1,D24-18554,control + H20 (last 14 days) replicate 1,RNA-seq,human,,control,iPSC-derivded induced neurons,ABCA7control,+ H20 (last 14 days),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18554,counts.txt,,241121Tsa_D24-18554_1_sequence.fastq,241121Tsa_D24-18554_2_sequence.fastq
2,D24-18555,p.Tyr622* + H20 (last 14 days) replicate 0,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ H20 (last 14 days),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18555,counts.txt,,241121Tsa_D24-18555_1_sequence.fastq,241121Tsa_D24-18555_2_sequence.fastq
3,D24-18556,p.Tyr622* + H20 (last 14 days) replicate 1,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ H20 (last 14 days),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18556,counts.txt,,241121Tsa_D24-18556_1_sequence.fastq,241121Tsa_D24-18556_2_sequence.fastq
4,D24-18557,p.Tyr622* + H20 (last 14 days) replicate 2,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ H20 (last 14 days),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18557,counts.txt,,241121Tsa_D24-18557_1_sequence.fastq,241121Tsa_D24-18557_2_sequence.fastq
5,D24-18558,p.Tyr622* + H20 (last 14 days) replicate 3,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ H20 (last 14 days),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18558,counts.txt,,241121Tsa_D24-18558_1_sequence.fastq,241121Tsa_D24-18558_2_sequence.fastq
6,D24-18559,p.Tyr622* + H20 (last 14 days) replicate 4,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ H20 (last 14 days),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18559,counts.txt,,241121Tsa_D24-18559_1_sequence.fastq,241121Tsa_D24-18559_2_sequence.fastq
7,D24-18563,p.Tyr622* + CDP-choline (last 14 days)) replic...,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ CDP-choline (last 14 days)),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18563,counts.txt,,241121Tsa_D24-18563_1_sequence.fastq,241121Tsa_D24-18563_2_sequence.fastq
8,D24-18564,p.Tyr622* + CDP-choline (last 14 days)) replic...,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ CDP-choline (last 14 days)),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18564,counts.txt,,241121Tsa_D24-18564_1_sequence.fastq,241121Tsa_D24-18564_2_sequence.fastq
9,D24-18565,p.Tyr622* + CDP-choline (last 14 days)) replic...,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ CDP-choline (last 14 days)),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18565,counts.txt,,241121Tsa_D24-18565_1_sequence.fastq,241121Tsa_D24-18565_2_sequence.fastq


In [7]:
# save files for upload to GEO
out_path = '/Users/djuna/Documents/abca7_dryad/for_GEO'
metadata_geo_format.to_csv(
    os.path.join(out_path, "metadata_geo_format.txt"),
    sep="\t",
    index=False)

subset.to_csv(
    os.path.join(out_path, "counts.txt"),
    sep="\t",
    index=False)


In [8]:
# save directories to copy 
dirs = [f'/home/gridsan/djuna/homer/github/ABCA7lof2/bulkRNAseq/raw_data/241121Tsa/{x}-6997E' for x in metadata_geo_format['library name']]

files = []
for i in range(len(dirs)):
    files.append(np.array(dirs)[i] + '/' + np.array(metadata_geo_format['raw file 1'])[i])

for i in range(len(dirs)):
    files.append(np.array(dirs)[i] + '/' + np.array(metadata_geo_format['raw file 2'])[i])

out_file = '/Users/djuna/Documents/abca7_dryad/for_GEO/files_to_copy.txt'
with open(out_file, "w", encoding="utf-8") as fh:
    for path in files:
        fh.write(f"{path}\n")  


In [14]:
sums = pd.read_csv(out_path + '/md5sums_241121Tsa.txt', sep=' ', header=None)

In [35]:
sums_dictionary = dict(zip(sums.iloc[:,2], sums.iloc[:,0]))

In [49]:
[print(sums_dictionary[f'./{x.split("_")[1]}-6997E/' + x]) for x in metadata_geo_format['raw file 1']]

4753a0a2b6d26b805d923f0e0bf89b12
9f30611ba848a8b6fb7a8d07cbc2798c
3c27698cab64c1757f03674d3fdf03a2
d78dd242cb995317db4a765c303e969d
7054bad0c2b30eca7d8c1feae8a3836d
b904e8c975b6451cfc8618c94db69826
1ded8c14aa8c642e540e1d9760c2763f
33c282580ddacdf7a148c6422abe1943
72aeabd8da7dce01b71e5d5b9e5ac8f7
d41d8cd98f00b204e9800998ecf8427e
93aff76990a2fb879e3467cca4156b86
4ce3b4602b71fec6b58fe6f354f2176f
96c093e4f3cce404da9239ff4ec0a270
5970480aebc7be3b9fd1abf905487e35
d2edca09c55a18e8ba15ffca080f080a
b79b2aee07423e194f7b3c3c8a0c5280
0df1b253377eb9b1debfe668512bbf04


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [50]:
[print(sums_dictionary[f'./{x.split("_")[1]}-6997E/' + x]) for x in metadata_geo_format['raw file 2']]

6ecc784fc9e68348b859abd0849f1796
3e288dab0fc5a9e75d62b240092bd69d
7f9e50db45d5265596084c0741a81d51
6f1338aa088a7768f82fcd7d1d18a2a7
545ceb2f44eec894eb01964a85a6d1d6
374117427b75beafa10160b313356d7c
b6f6e679583e73107f1c06b386c5563f
b3fd1bdfe7b0c00707162ef67a123a5c
fb6c939312730f391c3e1bdca7e6fddc
d41d8cd98f00b204e9800998ecf8427e
e7d8676ee689360f8663443ae7e3b773
8591d7c26e6ee64b9656e9055b76ca82
83e6eb3a9864829e223173fa50aa5397
7403ecc31a5e7870c2a266ea770f7bee
09a0841f1a3654214e91cf5016a7e797
0ac3f44a97890898cdaa7ae644ec0597
6768728140e9bf3502a656df74f35f0d


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [47]:
metadata_geo_format

Unnamed: 0,library name,title,library strategy,organism,tissue,cell line,cell type,genotype,treatment,time,molecule,single or paired-end,instrument model,description,processed data file,processed data file 2,raw file 1,raw file 2
0,D24-18553,control + H20 (last 14 days) replicate 0,RNA-seq,human,,control,iPSC-derivded induced neurons,ABCA7control,+ H20 (last 14 days),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18553,counts.txt,,241121Tsa_D24-18553_1_sequence.fastq,241121Tsa_D24-18553_2_sequence.fastq
1,D24-18554,control + H20 (last 14 days) replicate 1,RNA-seq,human,,control,iPSC-derivded induced neurons,ABCA7control,+ H20 (last 14 days),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18554,counts.txt,,241121Tsa_D24-18554_1_sequence.fastq,241121Tsa_D24-18554_2_sequence.fastq
2,D24-18555,p.Tyr622* + H20 (last 14 days) replicate 0,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ H20 (last 14 days),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18555,counts.txt,,241121Tsa_D24-18555_1_sequence.fastq,241121Tsa_D24-18555_2_sequence.fastq
3,D24-18556,p.Tyr622* + H20 (last 14 days) replicate 1,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ H20 (last 14 days),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18556,counts.txt,,241121Tsa_D24-18556_1_sequence.fastq,241121Tsa_D24-18556_2_sequence.fastq
4,D24-18557,p.Tyr622* + H20 (last 14 days) replicate 2,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ H20 (last 14 days),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18557,counts.txt,,241121Tsa_D24-18557_1_sequence.fastq,241121Tsa_D24-18557_2_sequence.fastq
5,D24-18558,p.Tyr622* + H20 (last 14 days) replicate 3,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ H20 (last 14 days),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18558,counts.txt,,241121Tsa_D24-18558_1_sequence.fastq,241121Tsa_D24-18558_2_sequence.fastq
6,D24-18559,p.Tyr622* + H20 (last 14 days) replicate 4,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ H20 (last 14 days),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18559,counts.txt,,241121Tsa_D24-18559_1_sequence.fastq,241121Tsa_D24-18559_2_sequence.fastq
7,D24-18563,p.Tyr622* + CDP-choline (last 14 days)) replic...,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ CDP-choline (last 14 days)),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18563,counts.txt,,241121Tsa_D24-18563_1_sequence.fastq,241121Tsa_D24-18563_2_sequence.fastq
8,D24-18564,p.Tyr622* + CDP-choline (last 14 days)) replic...,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ CDP-choline (last 14 days)),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18564,counts.txt,,241121Tsa_D24-18564_1_sequence.fastq,241121Tsa_D24-18564_2_sequence.fastq
9,D24-18565,p.Tyr622* + CDP-choline (last 14 days)) replic...,RNA-seq,human,,p.Tyr622*,iPSC-derivded induced neurons,ABCA7p.Tyr622*,+ CDP-choline (last 14 days)),Day 28,total RNA,paired-end,Element AVITI,Column name in counts.txt: D24-18565,counts.txt,,241121Tsa_D24-18565_1_sequence.fastq,241121Tsa_D24-18565_2_sequence.fastq


In [45]:
sums_dictionary

{'./D24-18556-6997E/241121Tsa_D24-18556_1_sequence.fastq': 'd78dd242cb995317db4a765c303e969d',
 './D24-18556-6997E/241121Tsa_D24-18556_2_sequence.fastq': '6f1338aa088a7768f82fcd7d1d18a2a7',
 './D24-18557-6997E/241121Tsa_D24-18557_1_sequence.fastq': '7054bad0c2b30eca7d8c1feae8a3836d',
 './D24-18557-6997E/241121Tsa_D24-18557_2_sequence.fastq': '545ceb2f44eec894eb01964a85a6d1d6',
 './D24-18559-6997E/241121Tsa_D24-18559_1_sequence.fastq': '1ded8c14aa8c642e540e1d9760c2763f',
 './D24-18559-6997E/241121Tsa_D24-18559_2_sequence.fastq': 'b6f6e679583e73107f1c06b386c5563f',
 './D24-18561-6997E/241121Tsa_D24-18561_1_sequence.fastq': '3b60310ad2ff92e3622980f616e0d196',
 './D24-18561-6997E/241121Tsa_D24-18561_2_sequence.fastq': '8e3d039a972c0b895ddb5726ea610ddf',
 './D24-18562-6997E/241121Tsa_D24-18562_1_sequence.fastq': '0643c65208df9a438d5374fa2fd188d8',
 './D24-18562-6997E/241121Tsa_D24-18562_2_sequence.fastq': '75a1d60d70bef31246ffa53ebea614bb',
 './D24-18564-6997E/241121Tsa_D24-18564_1_sequence