In [None]:
!pip install synapseclient==1.6.1 pandas

In [None]:
# Necessary libraries
from IPython.display import clear_output

from tqdm import tqdm_notebook as tqdm
import synapseclient
import pandas as pd
import subprocess
import os
import getpass

# Set pathway
local = os.getcwd()
# Set synapse user
syn_user = ""
# Kallisto index-file
kalRef = "<PATH-TO>/Homo_sapiens.GRCh38.cdna.all.release-94_k31.idx"

In [None]:
# Import dataframe containing all .fastq files
tablePath = local + "/data/MayoRNAseq_TCX_table.csv"
df = pd.read_csv(tablePath)
df.head() # visualize

In [None]:
# Connect on server
syn = synapseclient.login(syn_user, getpass.getpass()) # Please do not share user content

In [None]:
# Download and Realignment

df = df.sort_values(by=["name"]) # Sort dataframe by name
samples = df.specimenID.unique() # Samples list

fqPath = local + "/fastq"

def runKallisto(samples,path):
    
    # Create kallisto output dir
    kalDir = local + "/kallisto/MAYO_TCX/" + samples
    # Create directory
    os.popen("mkdir " + kalDir)
    
    # Import ids
    
    print("\nSamples:")
    name1, name2 = df.name[df.specimenID==samples]
    r1, r2 = df.id[df.specimenID==samples]
    print("Sample.1: "+name1)
    print("r1: "+r1)
    print("Sample.2: "+name2)
    print("r2: "+r2)
    
    if os.path.isfile(kalDir + "/abundance.tsv"):
        print(f"Sample {samples} was already processed!")
        pass
    
    else:
        print("Downloading " + samples + " ...")
        file1 = syn.get(r1, downloadLocation=fqPath) # Download file1 to kallisto
        file2 = syn.get(r2, downloadLocation=fqPath) # Download file2 to kallisto

        # Run Kallisto
        print(f"Running kallisto on {samples} ...")
        subprocess.call(f"kallisto quant -i {kalRef} -t 7 -l 100 -s 20 -o {kalDir} {file1.path} {file2.path}", shell=True)

        # Remove files
        os.popen("rm " + file1.path)
        os.popen("rm " + file2.path)
    
for i in samples:
    runKallisto(samples = i, path = local)


In [None]:
%timeit
# Run process
for i in tqdm(samples):
    runKallisto(samples = i, path = local)