Purpose: Ran the nf-core rnaseq pipeline on the B73 non-heat and salt samples. The minimum mapping rate was 24%. I want to find out which and how many samples are mapping under 50%.<br>
Author: Anna Pardo<br>
Date initiated: April 27, 2023

In [1]:
# load modules
import pandas as pd

In [2]:
# load data
maprates = pd.read_csv("salmon_map_rates.csv",sep=",",header="infer")
maprates.head()

Unnamed: 0,File,Sample,Percent_Mapped
0,/mnt/scratch/haberan2/Core_Stress_Response/01_...,SRR6807372,78.328464
1,/mnt/scratch/haberan2/Core_Stress_Response/01_...,SRR2144421,85.373789
2,/mnt/scratch/haberan2/Core_Stress_Response/01_...,SRR5344627,83.929147
3,/mnt/scratch/haberan2/Core_Stress_Response/01_...,SRR11091716,31.425001
4,/mnt/scratch/haberan2/Core_Stress_Response/01_...,SRR2144418,82.633523


In [5]:
# rename Sample column to SRA_number
maprates.rename(columns={"Sample":"SRA_number"},inplace=True)

In [3]:
# load metadata
md = pd.read_csv("srr_numbers_with_metadata_14-Apr-2023.tsv",sep="\t",header="infer")
md.head()

Unnamed: 0,BioProject,SRA_number,sample_name,Replicate_num,Genotype,Technology,Library_layout,Treatment,Duration_hours,Time_after_treatment,...,Developmental_stage,Tissue,Day_length_hours,Day_temp_C,Night_temp_C,Relative humidity (%),Light (umol/m2/s),Growth Env.,Media,Notes
0,PRJNA637522,SRR11933261,238_WS2,,238,Illumina HiSeq 2500,PAIRED,Drought,312.0,13 days,...,V3,Leaf,16.0,25.0,,,,Greenhouse,,
1,PRJNA637522,SRR11933272,238_WS1,,238,Illumina HiSeq 2500,PAIRED,Drought,216.0,9 days,...,V3,Leaf,16.0,25.0,,,,Greenhouse,,
2,PRJNA637522,SRR11933250,268_WS1,,268,Illumina HiSeq 2500,PAIRED,Drought,216.0,9 days,...,V3,Leaf,16.0,25.0,,,,Greenhouse,,
3,PRJNA637522,SRR11933029,268_WW,,268,Illumina HiSeq 2500,PAIRED,Control,216.0,9 days,...,V3,Leaf,16.0,25.0,,,,Greenhouse,,
4,PRJNA637522,SRR11933040,268_WS2,,268,Illumina HiSeq 2500,PAIRED,Drought,312.0,13 days,...,V3,Leaf,16.0,25.0,,,,Greenhouse,,


In [6]:
# merge metadata and mapping rate data
mdmap = maprates.merge(md,how="inner")
mdmap.head()

Unnamed: 0,File,SRA_number,Percent_Mapped,BioProject,sample_name,Replicate_num,Genotype,Technology,Library_layout,Treatment,...,Developmental_stage,Tissue,Day_length_hours,Day_temp_C,Night_temp_C,Relative humidity (%),Light (umol/m2/s),Growth Env.,Media,Notes
0,/mnt/scratch/haberan2/Core_Stress_Response/01_...,SRR6807372,78.328464,PRJNA436973,supply 0.04mM nitrogen (LN),1.0,B73,Illumina HiSeq 2500,PAIRED,Low_Nitrogen,...,V6,Leaf,14.0,28.0,22.0,,250-300,Chamber,Nutrient solution,
1,/mnt/scratch/haberan2/Core_Stress_Response/01_...,SRR2144421,85.373789,PRJNA291919,drought-stressed,,B73,Illumina HiSeq 2500,SINGLE,Drought,...,V12,Ear,,,,,,Field,,
2,/mnt/scratch/haberan2/Core_Stress_Response/01_...,SRR5344627,83.929147,PRJNA378714,drought_re-watered_rep2,2.0,B73,Illumina HiSeq 2000,PAIRED,DroughtRec,...,V3,Shoot,18.0,25.0,,,,Greenhouse,,
3,/mnt/scratch/haberan2/Core_Stress_Response/01_...,SRR11091716,31.425001,PRJNA606824,12h_1,1.0,B73,Illumina HiSeq 4000,PAIRED,Flooding,...,Seedling,Root,14.0,28.0,22.0,60.0,,Greenhouse,,
4,/mnt/scratch/haberan2/Core_Stress_Response/01_...,SRR2144418,82.633523,PRJNA291919,drought-stressed,,B73,Illumina HiSeq 2500,SINGLE,Drought,...,V12,Ear,,,,,,Field,,


In [7]:
# select only samples with map rates <= 50%
lowmapping = mdmap[mdmap["Percent_Mapped"]<=50]

In [8]:
lowmapping

Unnamed: 0,File,SRA_number,Percent_Mapped,BioProject,sample_name,Replicate_num,Genotype,Technology,Library_layout,Treatment,...,Developmental_stage,Tissue,Day_length_hours,Day_temp_C,Night_temp_C,Relative humidity (%),Light (umol/m2/s),Growth Env.,Media,Notes
3,/mnt/scratch/haberan2/Core_Stress_Response/01_...,SRR11091716,31.425001,PRJNA606824,12h_1,1.0,B73,Illumina HiSeq 4000,PAIRED,Flooding,...,Seedling,Root,14.0,28.0,22.0,60,,Greenhouse,,
9,/mnt/scratch/haberan2/Core_Stress_Response/01_...,SRR11091709,40.192046,PRJNA606824,4h_1,1.0,B73,Illumina HiSeq 4000,PAIRED,Flooding,...,Seedling,Root,14.0,28.0,22.0,60,,Greenhouse,,
10,/mnt/scratch/haberan2/Core_Stress_Response/01_...,SRR11091708,49.445531,PRJNA606824,4h_2,2.0,B73,Illumina HiSeq 4000,PAIRED,Flooding,...,Seedling,Root,14.0,28.0,22.0,60,,Greenhouse,,
20,/mnt/scratch/haberan2/Core_Stress_Response/01_...,SRR2144465,44.169458,PRJNA291919,drought-stressed,,B73,Illumina HiSeq 2500,SINGLE,Drought,...,V14,Tassel,,,,,,Field,,
23,/mnt/scratch/haberan2/Core_Stress_Response/01_...,SRR2144475,45.706363,PRJNA291919,drought-stressed,,B73,Illumina HiSeq 2500,SINGLE,Drought,...,R1,Tassel,,,,,,Field,,
68,/mnt/scratch/haberan2/Core_Stress_Response/01_...,SRR2144447,44.89015,PRJNA291919,well-watered,,B73,Illumina HiSeq 2500,SINGLE,Control,...,V12,Tassel,,,,,,Field,,
84,/mnt/scratch/haberan2/Core_Stress_Response/01_...,SRR2106185,36.257916,PRJNA290180,,1.0,B73,Illumina HiSeq 2000,SINGLE,DroughtRec,...,V5/V6,Leaf,,29.0,21.0,60-80,,Greenhouse,,
88,/mnt/scratch/haberan2/Core_Stress_Response/01_...,SRR2144458,45.117126,PRJNA291919,well-watered,,B73,Illumina HiSeq 2500,SINGLE,Control,...,R1,Tassel,,,,,,Field,,
100,/mnt/scratch/haberan2/Core_Stress_Response/01_...,SRR2144469,44.557786,PRJNA291919,drought-stressed,,B73,Illumina HiSeq 2500,SINGLE,Drought,...,V18,Tassel,,,,,,Field,,
105,/mnt/scratch/haberan2/Core_Stress_Response/01_...,SRR2106187,44.596716,PRJNA290180,,1.0,B73,Illumina HiSeq 2000,SINGLE,DroughtSaltRec,...,V5/V6,Leaf,,29.0,21.0,60-80,,Greenhouse,,


In [11]:
# there are 15 low-mapping samples: print the list of SRR numbers
for i in list(lowmapping["SRA_number"]):
    print(i)

SRR11091716
SRR11091709
SRR11091708
SRR2144465
SRR2144475
SRR2144447
SRR2106185
SRR2144458
SRR2144469
SRR2106187
SRR11091724
SRR11091722
SRR2144449
SRR2144448
SRR11091707


In [13]:
lowmapping[["SRA_number","Percent_Mapped","BioProject","Treatment","Developmental_stage","Tissue"]]

Unnamed: 0,SRA_number,Percent_Mapped,BioProject,Treatment,Developmental_stage,Tissue
3,SRR11091716,31.425001,PRJNA606824,Flooding,Seedling,Root
9,SRR11091709,40.192046,PRJNA606824,Flooding,Seedling,Root
10,SRR11091708,49.445531,PRJNA606824,Flooding,Seedling,Root
20,SRR2144465,44.169458,PRJNA291919,Drought,V14,Tassel
23,SRR2144475,45.706363,PRJNA291919,Drought,R1,Tassel
68,SRR2144447,44.89015,PRJNA291919,Control,V12,Tassel
84,SRR2106185,36.257916,PRJNA290180,DroughtRec,V5/V6,Leaf
88,SRR2144458,45.117126,PRJNA291919,Control,R1,Tassel
100,SRR2144469,44.557786,PRJNA291919,Drought,V18,Tassel
105,SRR2106187,44.596716,PRJNA290180,DroughtSaltRec,V5/V6,Leaf
