# Differential analysis with kallisto and sleuth

Useful links:
* https://hbctraining.github.io/DGE_workshop_salmon/lessons/09_sleuth.html
* 

In [None]:
import os
import shutil
import numpy as np
import yaml

In [None]:
with open("../config.yaml", "r") as yamlfile:
    config = yaml.load(yamlfile, Loader=yaml.FullLoader)
    print("Read successful")
print(config)

In [None]:
ratdb = config[0]['data']['ratdb']
fastp = config[0]['data']['fastp']
kallisto = config[0]['data']['kallisto']

## Building index

Rat data was downloaded from https://www.ensembl.org/Rattus_norvegicus/Info/Index. Genome release: mRatBN7.2 (GCA_015227675.2). NB: soft masked genome contains difficult regions marked with lower case bases (i.e. repetitive sequences), for gtf files chr suffix denotes only transcripts mapped to chromosomes specifically. We combined cdna and ncrna cDNA sequences to create index for kallisto. Another useful resource is https://rgd.mcw.edu/rgdweb/homepage/.

In [None]:
!kallisto index -i {ratdb}/mRatBN7.2_kallisto.idx {ratdb}/Rattus_norvegicus.mRatBN7.2.cdna.ncrna.fa.gz

## Quantification

In [None]:
os.chdir(fastp)
dirs = [d for d in os.listdir('.') if os.path.isdir(d)]
dirs.sort()
len(dirs)

In [None]:
ids = []
for d in dirs:
    ids.append(d.rsplit("_", 1)[0])
ids = np.array(ids)
ids = np.unique(ids)
len(ids)

In [None]:
for i in range(len(ids)):
    !kallisto quant -i {ratdb}/mRatBN7.2_kallisto.idx -t 28 -b 100 {fastp}/{ids[i]}_fastp/{ids[i]}_R1_fastp.fq.gz {fastp}/{ids[i]}_fastp/{ids[i]}_R2_fastp.fq.gz -o {kallisto}/{ids[i]}_kallisto --verbose