# Custom Report Generator
### Takes in tNGS outfile and sequencing load file to create 'custom report' file to be able to import into Starlims

In [1]:
import pandas as pd
import os
import time

In [49]:
tngs = pd.read_csv('../output/tNGS_export.csv')
# seq = pd.read_csv('../data/seq1.csv',delimiter="\t")
seq = pd.read_csv('../data/seq2.csv',delimiter="\t", header=None)
seq = seq.iloc[4:,:2].copy()
tngs.columns = [c.replace(' ', '_') for c in tngs.columns]
#seq.columns = [c.replace(' ', '_') for c in seq.columns]
seq

Unnamed: 0,0,1
4,Well,Sample Name
5,A01,cnvcontrol01_F_SF2001234
6,B01,cnvcontrol01_M_SF2001234
7,C01,EX1100630_SF2001234
8,D01,EX1702245_SF2001234
...,...,...
100,F04,EX1816211_SF2001234
101,G04,EX1816212_SF2001234
102,H04,EX1816215_SF2001234
103,A05,EX1816215_SF2001234


In [3]:
# Starlims sequencing'NGS' workbatch number
workbatchNo = seq['Container_Name'][0]

In [4]:
# Get starlims 'amplicon'
star_amplicon = "_NGS_"

In [5]:
# need to get a dictionary of all sample ID from sequencing load file as that represents what can get 
# imported back into Starlims. Mostly important for the ddPCR mutation details import
samples = list(seq['Plate_ID'].unique())
del samples[:4]
samples = [sample[:9] for sample in samples]

In [6]:
# new column with sample id and mutsurveyor variant
tngs['id_variant'] = tngs['Folder_number'] + "_" + tngs['MutSurveyor']
tngs_var = list(tngs['id_variant'].unique())
tngs_var = [var for var in tngs_var if str(var) != 'nan']

In [7]:
# new column with sample id and mutation details variant
tngs['id_mut'] = tngs['Folder_number'] + "_" + tngs['MutDetails']
tngs_mut = list(tngs['id_mut'].unique())

In [8]:
# new columns with sample id and genomic nomenclature
tngs['id_genomic'] = tngs['Folder_number'] + "_" + tngs['Genomic_nomenclature']
tngs_genomic = list(tngs['id_genomic'].unique())

In [9]:
# create a dictionary with sample id's and associated variants
sample_vars = {k:[] for k in samples}
for sample in samples:
    for variant in tngs_var:
        if sample == variant[:9]:
            try:
                sample_vars[sample].append(variant[10:])
            except:
                sample_vars[sample] = [variant[10:]]

In [10]:
# create custom report template
def create_custom_report():
    f = open(f'{os.getcwd()}/../output/custom_report.txt', 'w+')
    header = "Warning!\nSample Name\tReference Name\tLane Quality\tROI Coverage\t#nts below threshold\tQuality ROI\tVariant1\tVariant3\tVariant3\tVariant4\n"
    body = ""
    for sample in sample_vars:
        no_vars = len(sample_vars[sample])
        sample_id = sample + star_amplicon + workbatchNo
        if no_vars == 1:
            body += f"{sample_id}\t\t\t\t\t\t{sample_vars[sample][0]}\n"
        elif no_vars == 2:
            body += f"{sample_id}\t\t\t\t\t\t{sample_vars[sample][0]}\t{sample_vars[sample][1]}\n"        
        elif no_vars == 3:
            body += f"{sample_id}\t\t\t\t\t\t{sample_vars[sample][0]}\t{sample_vars[sample][1]}\t{sample_vars[sample][2]}\n" 
        elif no_vars == 4:
            body += f"{sample_id}\t\t\t\t\t\t{sample_vars[sample][0]}\t{sample_vars[sample][1]}\t{sample_vars[sample][2]}\t{sample_vars[sample][3]}\n"
        else:
            body += f"{sample_id}\t\t\t\t\t\t\t\t\t\t\n"
    f.write(header)
    f.write(body)
    f.close()

create_custom_report()

In [11]:
# create mutation details 'ddPCR' import file

# cols - well, sampleID, variant

def create_mut_details():
    md_import = seq.iloc[3:,:].copy()

create_mut_details()

In [12]:
md = seq.iloc[3:,:].copy()
md = md.iloc[:,:2].copy()
md.to_csv("md.csv", header=None, index=False)
md = pd.read_csv("md.csv")
md.columns = [c.replace(' ', '_') for c in md.columns]
md["well_id"] = md["Well"] + "_" + md["Sample_Name"].str[:9]
md

Unnamed: 0,Well,Sample_Name,well_id
0,A01,cnvcontrol01_F_SF2001234,A01_cnvcontro
1,B01,cnvcontrol01_M_SF2001234,B01_cnvcontro
2,C01,EX1100630_SF2001234,C01_EX1100630
3,D01,EX1702245_SF2001234,D01_EX1702245
4,E01,EX1714297_SF2001234,E01_EX1714297
...,...,...,...
95,F04,EX1816211_SF2001234,F04_EX1816211
96,G04,EX1816212_SF2001234,G04_EX1816212
97,H04,EX1816215_SF2001234,H04_EX1816215
98,A05,EX1816215_SF2001234,A05_EX1816215


# Mutation details import
- Well | SampleID | MutDetail | Genomic
- Well + SampleID taken from seq.csv
- MutDetail and Genomic taken from tNGS_export.csv

In [70]:
list(seq[1])

['Sample Name',
 'cnvcontrol01_F_SF2001234',
 'cnvcontrol01_M_SF2001234',
 'EX1100630_SF2001234',
 'EX1702245_SF2001234',
 'EX1714297_SF2001234',
 'EX1806104_SF2001234',
 'EX1806937_SF2001234',
 'EX1809676_SF2001234',
 'EX1812034_SF2001234',
 'EX1812591_SF2001234',
 'EX1812591_SF2001234',
 'EX1813292_SF2001234',
 'EX1813352_SF2001234',
 'EX1813353_SF2001234',
 'EX1813386_SF2001234',
 'EX1813403_SF2001234',
 'EX1813529_SF2001234',
 'EX1813576_SF2001234',
 'EX1813608_SF2001234',
 'EX1813656_SF2001234',
 'EX1813905_SF2001234',
 'EX1813935_SF2001234',
 'EX1813937_SF2001234',
 'EX1813996_SF2001234',
 'EX1814024_SF2001234',
 'EX1814026_SF2001234',
 'EX1814264_SF2001234',
 'EX1814312_SF2001234',
 'EX1814341_SF2001234',
 'EX1814400_SF2001234',
 'EX1814483_SF2001234',
 'EX1814502_SF2001234',
 'EX1814537_SF2001234',
 'EX1814546_SF2001234',
 'EX1814680_SF2001234',
 'EX1814711_SF2001234',
 'EX1814713_SF2001234',
 'EX1814825_SF2001234',
 'EX1814881_SF2001234',
 'EX1814956_SF2001234',
 'EX1814967_SF

In [65]:
# Create Well-Sample dictionary {well:Sample} from seq.csv
well_sample = {}
well = [x for x in seq[0].unique() if len(x) == 3]
seqID = [str(x)[:9] for x in seq[1].unique()]
for w, s in zip(well, seqID):
    well_sample[w] = s

In [48]:
# new seq dictionary, nested lists with mutdetails and genomic nomenclature
seq_mut = {k:[[],[]] for k in seqID}
for sample in seqID:
    for variant in tngs_mut:
        if sample == variant[:9]:
            try:
                seq_mut[sample][0].append(variant[10:])
            except:
                seq_mut[sample][0] = [variant[10:]]
    for genomic in tngs_genomic:
        if sample == str(genomic)[:9]:
            try:
                seq_mut[sample][1].append(str(genomic)[10:])
            except:
                seq_mut[sample][1] = [str(genomic)[10:]]
seq_mut

{'EX1100630': [['nan exnan p. g.'], []],
 'EX1702245': [['nan exnan p. g.'], []],
 'EX1714297': [['nan exnan p. g.'], []],
 'EX1806104': [['nan exnan p. g.'], []],
 'EX1806937': [['nan exnan p. g.'], []],
 'EX1809676': [['nan exnan p. g.'], []],
 'EX1812034': [['nan exnan p. g.'], []],
 'EX1812591': [['GLIS3 ex4 p.Gln528Ter/N c.1582C>T/N',
   'GLIS3 ex4 p.His520Asn/N c.1558C>A/N'],
  ['Chr9(GRCh37):g.4117896G>A', 'Chr9(GRCh37):g.4117920G>T']],
 'EX1813292': [['nan exnan p. g.'], []],
 'EX1813352': [['nan exnan p. g.'], []],
 'EX1813353': [['nan exnan p. g.'], []],
 'EX1813386': [['GATA6 ex2 p.Pro63Ser/N c.187C>T/N'],
  ['Chr18(GRCh37):g.19751292C>T']],
 'EX1813403': [['nan exnan p. g.'], []],
 'EX1813529': [['nan exnan p. g.'], []],
 'EX1813576': [['nan exnan p. g.'], []],
 'EX1813608': [['CASR ex7 p.Cys787Arg/N c.2359T>C/N'],
  ['Chr3(GRCh37):g.122003160T>C']],
 'EX1813656': [['nan exnan p. g.'], []],
 'EX1813905': [['HNF4A ex8 p.Arg311Cys/N c.931C>T/N'],
  ['Chr20(GRCh37):g.43052762C

In [68]:
len(seqID)

97