In [196]:
# download latest version of pip, numpy, pandas if desired
!pip install --upgrade pip
!pip install --upgrade numpy pandas

Looking in indexes: https://registry.affirm-stage.com/artifactory/api/pypi/pypi/simple
Requirement already up-to-date: pip in /Users/bensonlee/.pyenv/versions/3.7.3/lib/python3.7/site-packages (20.1.1)
Looking in indexes: https://registry.affirm-stage.com/artifactory/api/pypi/pypi/simple
Requirement already up-to-date: numpy in /Users/bensonlee/.pyenv/versions/3.7.3/lib/python3.7/site-packages (1.18.4)
Requirement already up-to-date: pandas in /Users/bensonlee/.pyenv/versions/3.7.3/lib/python3.7/site-packages (1.0.3)


In [197]:
import yaml
import os

import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from sklearn.decomposition import PCA

import seaborn as sns

## Some setup

In [198]:
rand_incorr = ["Not quite, but you're learning! Try again.", 
               "That's not the answer I was looking for, but try again.",
               "Not exactly. Give it another go.",
               "You're close...I can feel it! Try it again.",
               "Nice try, but that's not exactly what I was hoping for. Try again.",
               "One more time. You can do it!"]

rand_corr = ["Excellent job!",
             "Nice job!",
             "You got it!"]

In [199]:
# Parse lesson
def process_lesson(yaml):
    idx = 0
    repeat_q_flag = False
    idx_lesson = len(lesson)
    while idx < idx_lesson:
        if lesson[idx]['Class'] == 'meta':
            proc_meta(lesson[idx])
            idx += 1
        elif lesson[idx]['Class'] == 'text':
            proc_text(lesson[idx])
            idx += 1
        elif lesson[idx]['Class'] == 'mult_question':
            corr = proc_mult_choice(lesson[idx], repeat_q_flag)
            repeat_q_flag = not corr
            idx += int(corr)    
        elif lesson[idx]['Class'] in ['cmd_question', 'script']:
#             print(lesson[idx])
            corr = proc_script(lesson[idx], repeat_q_flag)
            repeat_q_flag = not corr
            idx += int(corr)
        else:
            print("UNK!!")
            print(lesson[idx])
            idx += 1
            
    print("Congrats on completing the lesson!")

In [208]:
def proc_meta(d: dict):
    """
    Processes metadata associated with lesson bank
    """
    for k, v in d.items():
        if k != 'Class':
            print(f'{k}: {v}')
    return 1

def proc_text(d: dict):
    """
    Processes text information for question
    """
    for k, v in d.items():
        if k == 'Output':
            print(f'{v}')
            input("Press Enter to continue...")
        print("\n")
        
    return 1
        
def proc_mult_choice(d: dict, repeat_q: bool):
    """
    Prompts user for multiple choice question
    """
    for k, v in d.items():
        if k in ['Output', 'AnswerChoices'] and not repeat_q:
            if k == 'AnswerChoices':
                choices = '\n'.join(v.split(';'))
                print(f"Choices: \n\n{choices}")
            else:
                print(f'{v}')
        elif k == 'Hint' and repeat_q:
            print(f'Hint: {v}')
        elif k == 'CorrectAnswer':
            ans = v
#         else:
#             print(f"Unk: {k}, {v}")
            
    guess = input("> ")
    
    # perhaps need to consider numerical precision here; not sure if symbolic evaluation would be nice
    if guess.isnumeric():
        match = guess == ans
    else:
        match = re.sub('["|\']', '\'', re.sub('[^A-Za-z0-9]+', '', guess.upper())) == re.sub('["|\']', '\'', re.sub('[^A-Za-z0-9]+', '', ans.upper()))
        
    if match:
        print(random.choice(rand_corr))
    else:
        print(random.choice(rand_incorr))
        
    return match

def proc_script(d: dict, repeat_q: bool):
    """
    Prompts user for question requiring response
    """
    for k, v in d.items():
        if k in ['Output', 'AnswerChoices'] and not repeat_q:
            if k == 'Answer Choices':
                choices = '\n'.join(v.split(';'))
                print(f"Choices: \n\n{choices}")
            else:
                print(f'{v}')
        elif k == 'Hint' and repeat_q:
            print(f'Hint: {v}')
        elif k == 'CorrectAnswer':
            ans = v
            
    guess = input("> ")
    
    # perhaps need to consider numerical precision here; not sure if symbolic evaluation would be nice
    match = re.sub('["|\']', '\'', guess) == re.sub('["|\']', '\'', ans)
        
    if match:
        print(random.choice(rand_corr))
    else:
        print(random.choice(rand_incorr))
        
    return match

## Start running stuff

In [209]:
# List available courses
course_dir = '../courses-python/scRNA-seq/'

print("The following courses are available .. \n")
print('\n'.join([s for s in os.listdir(course_dir) if s != 'MANIFEST']))

The following courses are available .. 

low_level_analyses_norm_dim_reduction
high_level_analyses_intro_clustering
low_level_analyses_intro
low_level_analyses_explain_alignment
low_level_analyses_import_data
low_level_analyses_counting_reads


In [214]:
# Pick lesson to load
# doesn't work with import data or high_level_analyses_intro_clustering; think clustering prob needs to be demo'd
# since not convenional to do single line in python
lesson_to_load = 'low_level_analyses_explain_alignment'
lesson = yaml.load(open(os.path.join(course_dir, lesson_to_load, 'lesson.yaml')), Loader=yaml.FullLoader)

In [215]:
process_lesson(lesson)

Course: BiocSwirl
Lesson: Low Level Analyses
Author: Paaksum Wong
Type: Standard
Organization: The University of British Columbia
Version: 1.0


Next-gen sequencers output a FASTQ file for each sequencing read (single-end or paired-end), which will be the first thing you work with in scRNA-seq analysis. Each FASTQ file contains a sequence identifier, the nucleotide sequence as well as quality scores for each nucleotide encoded in ASCII characters. The first step of analysis is to align these FASTQ files to a reference.
Press Enter to continue...




There are many open source and commercial alignment tools with varying sensitivities and speeds. Which aligner you choose to use depends on available computing power and the acceptable trade-off between accuracy and speed. Commonly used aligners include BWA, Bowtie2, HISAT2, Bfast, and Stampy.
Press Enter to continue...




In our sample data, 100bp single-end reads were aligned using RSEM to the mm10 mouse genome build with the RefSeq anno