In [17]:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install pyclaragenomics-cuda-10-1

Collecting numpy==1.16.3
  Using cached numpy-1.16.3-cp37-cp37m-manylinux1_x86_64.whl (17.3 MB)
[31mERROR: pandas-profiling 2.8.0 has requirement matplotlib>=3.2.0, but you'll have matplotlib 3.0.3 which is incompatible.[0m
[31mERROR: pandas-profiling 2.8.0 has requirement tqdm>=4.43.0, but you'll have tqdm 4.31.1 which is incompatible.[0m
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.17.5
    Uninstalling numpy-1.17.5:
      Successfully uninstalled numpy-1.17.5
Successfully installed numpy-1.16.3


In [19]:
from claragenomics.bindings import cudaaligner
from claragenomics.simulators.genomesim import PoissonGenomeSimulator

In [20]:
def generate_data(max_query_len, max_target_len, num_alignments):
    genome_sim = PoissonGenomeSimulator()

    print("Generating data...")
    data = []
    for _ in range(num_alignments):
        query = genome_sim.build_reference(max_query_len)
        target = genome_sim.build_reference(max_target_len)
        data.append((query, target))
    print("Data generation complete.")

    return data

In [25]:
def run_cudaaligner(print_output):
    max_query_len = 100
    max_target_len = 150
    max_alignments_per_batch = 100

    sequence_pairs = generate_data(max_query_len, max_target_len, 10)

    batch = cudaaligner.CudaAlignerBatch(max_query_len,
                                         max_target_len,
                                         max_alignments_per_batch,
                                         device_id=0)

    pair_id = 0
    while(pair_id != len(sequence_pairs)):
        pair = sequence_pairs[pair_id]
        status = batch.add_alignment(pair[0], pair[1])

        # Exceeded max alignments or end of list
        if status == 2 or pair_id == len(sequence_pairs) - 1:
            batch.align_all()
            alignments = batch.get_alignments()
            if print_output:
                for a in alignments:
                    print("{}\n".format(a))
            batch.reset()
            print("Aligned sequences till {}".format(pair_id - 1))
        elif status != 0:
            print(status)
            raise RuntimeError("Unexpected error occurred : {}".format(cudaaligner.status_to_str(status)))

        # If alignment was added successfully, increment counter.
        if status == 0:
            pair_id = pair_id + 1

In [26]:
run_cudaaligner(True)

Generating data...
Data generation complete.
G-CAAA-T----A-TC-G--CATACAAAATAGCCAGGGGTTA-T---A-C-GACAT--T--GC-----GGC-A---A-T-G--CACTCCAATAGAGTACG-GTCCCA-TT--T--TG---GGCATT-T-GGCAGT--A-ACAA-C-GA---T
CTCGGAGTTCGTACTCAGGTCTTGCAGGA-AGCCAGGCGTTTGTCGGAGCCGCCCTGCTATGATATATGGCCAGCTAGTCGTTCAGTGCATTAG-GCAATTGTCTCACTTCATAGTGATAGGTACTCTAGGCGGTGTATACATTCTGGGGGT


----TT-TC-AA--TG---T-GGAG-G---T--G--C-TG-C--G-CT---T----T-G----AA--C-C--A--CAGC-A-AA--C-GTGGTTTAGT-CCAACT-C-GC-GCCATGCCCGGCTACGATGAGTCCTC-GAAAGGCGGGCTAGCA
CCCCTTCTCTAAAATGGAGTCGGTGTGAAGTATGAGCCTCACCCGGCTGGCTAAAATAGTTTTAAAACACGTAGACAGCTATAAAACCGTGCTGT-GTGCCTACTACCGTTGACACAACCGTCTCCTCTTCTTCCTTAGACACGCCTTTT-G--


CT--G--A-C--G-C-CT--CG-AGCC-C----T---G-GC-TT-G-CT--TGC-T-----TCTTAGATCGG--C--GTG-CAA-GCA-T-CACT--AGGATGGG-GCTATTT-GAACGA-TTCCC-T-GAGTCGTC-AGCGGTT-AT--C
ATTCGCTAACCTGGCACTTGCGGAACCGCCAAGTATCGCGCGTTTGACAGATGGATAGGAGTGTTATCTCAGTTCTAGAGGCCACGCAGTACACTGTAACAAGTCCGTTGTTTTGAAGGAGTTCCTATCGAGTA-TATAGCATTTGAAAAC


----C-GA-A-GTGCTCC--TAC-ATAT-