## De-multiplexing report

In [None]:
import json
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams['figure.dpi'] = 150

def plot_demult_stats(stats_json,samplesheet_csv):
  try:
    with open(stats_json,'r') as fp:
      data = json.load(fp)
    df = list()
    for i in data.get('ConversionResults'):
      lane_number = i.get('LaneNumber')
      total_cluster_raw = i.get('TotalClustersRaw')
      total_cluster_pf = i.get('TotalClustersPF')
      for j in i.get('DemuxResults'):
        sample_id = j.get('SampleId')
        sample_name = j.get('SampleName')
        index = j.get('IndexMetrics')[0].get('IndexSequence')
        num_reads = j.get('NumberReads')
        yield_val = j.get('Yield')
        df.append({
          'Lane':lane_number,
          'total_cluster_raw':total_cluster_raw,
          'total_cluster_pf':total_cluster_pf,
          'Sample_ID':sample_id,
          'Sample_Name':sample_name,
          'Index_seq':index,
          'Num_reads':num_reads,
          'Yield':int(yield_val/1000000)})
    df = pd.DataFrame(df)
    data = False
    data_list = list()
    with open(samplesheet_csv,'r') as fp:
      for i in fp:
        if i.startswith('[Data]'):
          data = True
          continue
        if data:
          data_list.append(i.strip().split(','))
    #samplesheet = pd.read_csv(samplesheet_csv) ## TO DO , parse real samplesheet
    samplesheet = pd.DataFrame(data_list[1:],columns=data_list[0])
    merged_df = samplesheet.set_index('Sample_ID')[['Sample_Project']].join(df.set_index('Sample_ID'),how='inner').reset_index()
    merged_df['Lane'] = merged_df['Lane'].astype(int)
    merged_df['Num_reads'] = merged_df['Num_reads'].astype(int)
    merged_df['total_cluster_pf'] = merged_df['total_cluster_pf'].astype(int)
    df1 = list()
    for lane_id,l_data in merged_df.groupby('Lane'):
      total_cluster_raw = l_data.get('total_cluster_raw').values[0]
      total_cluster_pf = l_data.get('total_cluster_pf').values[0]
      total_reads = l_data['Num_reads'].sum()
      total_undetermined = total_cluster_pf - total_reads
      df1.append({'lane':lane_id,'project':'Undetermined','reads':total_undetermined})
      for project_id,p_data in l_data.groupby('Sample_Project'):
        total_reads = p_data['Num_reads'].sum()
        df1.append({'lane':lane_id,'project':project_id,'reads':total_reads})
    df1 = pd.DataFrame(df1)
    df1['percentage'] = df1['reads'] /df1['reads'].sum()
    df1['reads'] = df1['reads'].astype(int)
    df1['lane'] = df1['lane'].astype(int)
    lanes = list(merged_df.groupby('Lane').groups.keys())
    plt.rcParams['figure.figsize']=(7,6)
    ax = sns.histplot(hue='project',y='lane',data=df1,multiple='stack',weights='percentage',bins=10)
    legend = ax.get_legend()
    legend.set_bbox_to_anchor((1.8,1))
    title = ax.set_title('Reads % for each project',fontsize=16)
    if len(lanes)==1:
      yticks = ax.set_yticks([lanes[0]-1,lanes[0],lanes[0]+1])
    else:
      yticks = ax.set_yticks([0,1,2,3,4,5,6,7,8,9])
    plt.show()
    lanes = list(merged_df.groupby('Lane').groups.keys())
    plt.rcParams['figure.figsize']=(12,7*len(lanes))
    fig,ax = plt.subplots(len(lanes),1)
    if len(lanes)==1:
      ax = sns.barplot(data=merged_df,x='Sample_ID',y='Num_reads',hue='Sample_Project',ax=ax)
      legend = ax.get_legend()
      legend.set_bbox_to_anchor((1,1))
      xticks = ax.set_xticklabels(ax.get_xticklabels(),rotation=65,fontsize=8)
      title = ax.set_title('Reads per sample - Lane {0}'.format(lanes[0]),loc='left',fontsize=25)
      ylabel = ax.set_ylabel('Number of reads')
      xlabel = ax.set_xlabel('Sample id')
    elif len(lanes) > 1:
      for i in range(0,len(lanes)):
        ax[i] = sns.barplot(data=merged_df,x='Sample_ID',y='Num_reads',hue='Sample_Project',ax=ax[i])
        legend = ax[i].get_legend()
        legend.set_bbox_to_anchor((1,1))
        xticks = ax[i].set_xticklabels(ax[i].get_xticklabels(),rotation=25,fontsize=8)
        title = ax[i].set_title('Lane {0}'.format(lanes[i]),loc='left',fontsize=25)
        ylabel = ax[i].set_ylabel('Number of reads')
        xlabel = ax[i].set_xlabel('Sample id')
    plt.show()
    df2 = list()
    for entry in data.get('UnknownBarcodes'):
      lane = entry.get('Lane')
      counter = 0
      for barcode,counts in entry.get('Barcodes').items():
        if counter < 20:
          counter += 1
          df2.append({'lane':lane,'barcode':barcode,'read count':counts})
    df2 = pd.DataFrame(df2)
    plt.rcParams['figure.figsize']=(4,4)
    ax = sns.barplot(data=df2,y='barcode',x='read count',hue='lane',orient='h')
    legend = ax.get_legend()
    legend.set_bbox_to_anchor((1,1))
    yticks = ax.set_yticklabels(ax.get_yticklabels(),fontsize=5)
    ylabel = ax.set_ylabel('Undetermined barcodes',fontsize=7)
    xlabel = ax.set_xlabel('Read counts',fontsize=7)
    xticks = ax.set_xticklabels(ax.get_xticks(),fontsize=5)
    ax.set_title('Undeterminded barcodes per lane',fontsize=10)
    plt.show()
  except:
    raise

In [None]:
plot_demult_stats(stats_json='{{ STATS_JSON }}',samplesheet_csv='{{ SAMPLESHEET_CSV }}')