# Serial Implementation

In [1]:
%%writefile smithwaterman.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <time.h>

int blosum62mat[25][25];

int place(char a) {
    switch(a) {
        case 'A': return 0;
        case 'R': return 1;
        case 'N': return 2;
        case 'D': return 3;
        case 'C': return 4;
        case 'Q': return 5;
        case 'E': return 6;
        case 'G': return 7;
        case 'H': return 8;
        case 'I': return 9;
        case 'L': return 10;
        case 'K': return 11;
        case 'M': return 12;
        case 'F': return 13;
        case 'P': return 14;
        case 'S': return 15;
        case 'T': return 16;
        case 'W': return 17;
        case 'Y': return 18;
        case 'V': return 19;
        case 'B': return 20;
        case 'J': return 21;
        case 'Z': return 22;
        case 'X': return 23;
        default: return 24;
    }
}

int score(char a, char b) {
    int dA,dB;
    dA = place(a);
    dB = place(b);
    return blosum62mat[dA][dB];
}

int** mana(char* seq1, char* seq2) {
    int k = (int)strlen(seq1) + 1;
    int h = (int)strlen(seq2) + 1;
    int i,j;
    int** mat = (int**)malloc(k * sizeof(int*));
    for (int i = 0; i < k; i++)
        mat[i] = (int*)malloc(h * sizeof(int));
    bool gap[k][h]; //keeps track of the gap
    for (i=0;i<k;i++) {
        for(j=0;j<h;j++) {
            if(i==0||j==0) {
                mat[i][j]=0;
                gap[i][j]=false;
            }
            else {
                int hgapsc = (!gap[i-1][j]) ? mat[i-1][j]-5 : mat[i-1][j]-1;
                int vgapsc = (!gap[i][j-1]) ? mat[i][j-1]-5 : mat[i][j-1]-1;
                int xscore = mat[i-1][j-1]+score(seq1[i-1],seq2[j-1]);
                if (hgapsc > vgapsc && hgapsc > xscore) {
                    mat[i][j]=hgapsc;
                    gap[i][j]=true;
                } else if (vgapsc > hgapsc && vgapsc > xscore) {
                    mat[i][j]=vgapsc;
                    gap[i][j]=true;
                } else {
                    mat[i][j] = xscore;
                    gap[i][j] = false;
                }
                if (mat[i][j]<0) {
                    mat[i][j]=0;
                    gap[i][j] = true;
                }
            }
        }
    }
    return mat;
}

void traceback(char* seq1, char* seq2, int** arr ){
    int j=0, k= 0;
    int j_max = 0,  k_max = 0, max_val = 0;

    for(j = 0; j < strlen(seq1) + 1; j++){
        for(k = 0; k < strlen(seq2) + 1; k++){
            if(arr[j][k] > max_val){
                max_val = arr[j][k];
                j_max = j;
                k_max = k;
            }
        }
    }
    j = j_max;
    k = k_max;
    printf("\nIndex start at %d %d \n", j, k);
    char fin_seq1 [j_max + k_max + 1];
    char fin_seq2 [k_max + j_max + 1];
    int l =0;

    while (arr[j][k] !=0 && j>=0 && k>=0) {
        int score_diagonal = arr[j-1][k-1];
        int score_left = arr[j][k-1];
        int score_up = arr[j-1][k];

        if(score_diagonal > score_left && score_diagonal > score_up) {
            j--;
            k--;
            fin_seq1[l] = seq1[j];
            fin_seq2[l] = seq2[k];
            l++;
        }
        else if(score_left > score_diagonal && score_left > score_up){
            k--;
            fin_seq1[l] = '-';
            fin_seq2[l] = seq2[k];
            l++;
        }else{
            j--;
            fin_seq1[l] = seq1[j];
            fin_seq2[l] = '-';
            l++;
        }
    }

    fin_seq1[l] = '\0';
    fin_seq2[l] = '\0';

    for(int i = 0; i < strlen(fin_seq1); i++){
        printf("%c", fin_seq1[i]);

    }
    printf("\n");
    for(int i = 0; i < strlen(fin_seq2); i++){
        printf("%c", fin_seq2[i]);
    }
    printf("\n");
}

int main() {
    FILE* fp;
    int i=0,j=0;

    char c;
    char exseq1[1024] = "MQVRGGIFGQVPSQCLITLSYVWPNICQENKKRIWDTWDMPRKWSTTPYDDPQKPGSTYQGKCEPPQFHHISLKFCFHCFHYSCDGAPRLQVGQDIIQENSDQAAKYHFHKTDQYLQCLVWNDMFCQRKEHSVTRQRWACAGDMDMKGYHCCDIIYMELCHFDIGNVEHPFFCMMQGEEWFAHRMEIAGAHNHHWPPVANPCITIIMGSFSYAYKVPLSSILFESNLKSNTYLLMCRDNQNSLLIFRKWVKVNILRIFHKAFDNSFAADIDWRLGGKEATWRKWREGKNMGDTGAPMWFLDDNKLWMTYRWEQWSVYVIYVFPAMAHMNDKVCSHVVKPIPYTCTKGFHKYIPNTKLYQTGEMTTFHTGFTKDTWICDWKVYRHAWQIWITYRKNDIYRVHVPADNSCMMGMAFTEYNNTWGRQPCQPQIIVTVIFRQILSQARFYWNHGHCCDSRHHIQKPFHFYTKHNMDMEWSWYTIFREALHDFMDTLMYPITTTEDWERVAYVFAVTQRPWPPEEEARGEQQAQKYFFKLCFIMNKCGLSSFDNPCEWHSVQITVIFTWHMFEWPPVGHSWQACEDEPIDSMMTKWAKWPVPGHGPLDKFGDCAEEVPEDFDIQGTFEEYLATNELSHTGPLWVPKANDEFCGWDYRCPGSFFRLQSPDFEAHNELSQIDRMTFIPNSWSLCVLHAVLKKQESHFKSHKPSFHVRNAVTVKQPLPCQKKNDISEVCEWEWTTQWADQLLSNDCYCEKTNDIVRPHDAIANASEHMGQFPNRQDMASRELLSWKEGIFGAHAHWTLEYWGFRGPCAFQQAIALRYQALQLMGWIIGRAFVMDEQTEHSGMVPNHMEDVWHLDMVNEQTMITCCAVNIVSTRQTFQGHHPCDGYPMCYFKLTEAQIYMDKHQKTQKCDNKNHEWVQHTHQWWTGAMKYYFICKRWWSIWVKCPKFNEWYYKAVNDRMQFKVKILVDWVSGGDCVETCETVRMWLAAQMFCIITDECCRLQSLDIDHTKAMSKFATTSQME";
    char exseq2[1024] = "CAAEDQESFGVAFNDTMVDISYTDEYYHRPDLHCMHIYEAPFMKVGFRQKWQGKENMSNLKECWVSGLNKSYFAVMCQCIADCYYDEHMAIQNHKSGKMWCHWYMHCQHKWMSIHGKGEEFYRAQLNPGARCCFYGFPKAMYMFYDPAPTSGCSYEITCDYGSGPWTEKHWKKPFATVRCRDQMIVQFNFNLMKQIVGDHQARKCESVRIVIWLAHLYCEVQDHELSHPQEKNRMQSRFDGHGECAPIDPTWEVYVGEQEVCDQAWIGNTDHYAFTRHRRTAMGPCRMDWGVCHDCKLFCLAEWSCTDNHTGMNKQFIRPHGCFRPIKDIMYGTEVCFVVDPDQNNFLADIHRAPSMLRGKQQYFSRLFCIDGSEQKGKVTLGWNEEQRSNWHPMSQNWRISDHTYAFGMFAQPTHNWLHTDSKAHDTLGISSENFLAAFNVCDKSTDMSHFKSTWCCILESNGKMYFWSGKAPWRVFQRIDNWQCFRYWTCFLIFPELEIGEWGPEYECRGQADGHESCRHDCHLDDLTRVHKLHKEGAGYCCGHCRYTTQSSDGFWWQIAPCKKEIGKSKASQCLYIRPWESCIANQRWAYPRDMTLSWMGNRHCTRGEQPWDFQCKEIFHMSNIASTLAVWEKYSSLGSSGVDYLMYTHQCKDHDAGITLEGPLSDTFTVELHPPRHSIVYCNKPPWQGESCLFWNEMHLLYYGHMIRAHWVETMGHCKLEDWFTTRYLITGEYAFRNYRVCGSFCYMKNHRYQRVIVINHNRHHDGMVMKLMTKKCDKTMQDMYDPMQYAEFVRTKDWERRICVPKQAAMTSYDGHITDYDLDLFQKGLTDEFPIPPKLPVIWGSPMCHNFRMEFKPLMSDFQWLQETIHQCLIMQATAYKEIPYIHSWYQCQFVIGRHMMWTFGCTTAGHYCVPPICWHWTIESKVIEKDKLRKCIVRDAQRDWDLSNGYFQDPEQYPAKWYLWKLSWRCYSVSFQPVKGNSCGASNYNTMEGGEWHNALFLNCPFISHSPMTQIWYS";

    int loope = 10;
    int** mat;

    if((fp = fopen("BLOSUM62.txt", "r"))==NULL) return 1;

    fscanf(fp, "%*[^\n]\n");
    fscanf(fp, "%*[^\n]\n");

    for(i = 0; i<25; i++) {
        fscanf(fp, "%c", &c);
        for(j = 0; j<25; j++) {
            fscanf(fp, "%d", &blosum62mat[i][j]);
        }
        fscanf(fp, "%c", &c);
    }

    clock_t startloop, endloop;

    startloop = clock();
    for (i = 0;i < loope; i++) {
        mat = mana(exseq1,exseq2);
    }
    endloop = clock();
    double timetaken = (endloop-startloop)*1e3/CLOCKS_PER_SEC;

    traceback(exseq1, exseq2, mat);
    printf("total kernel time(ms): %lf\naverage kernel time(ms):%lf", timetaken, timetaken/10);

    fclose(fp);
    return 0;
}

Writing smithwaterman.c


In [2]:
%%shell
gcc smithwaterman.c -o smithwaterman



In [3]:
%%shell
./smithwaterman


Index start at 992 1006 
FMQA-ALWMRVTECTEV----CD-GGSVWDVLIK-VKFQMRDNVAKYYWENFKPCKVWISW-WRKCI-FYYKMAGTW----WQHTHQV-------WEHNKND---CKQTKQHKDMYIQAETL-K--FYC-MPYGDCPHHGQF-TQRTSVINVACC---TI-MT-QE--NV--MDLHWVDEMHNPVMGSHE-TQEDMVFARGI----IWGML--QLAQYRLAIAQQ-FA-CPGRFGWYELTWHAHAG----F--IGEKWSLLERSAMDQRNPFQGM-H-ESANA-IADHPRVIDNTKECYCDNSLL-QDAW-QT--TW-EWECVESI-DNKKQCPLPQ-KVTVAN-R-V---HFSPKHSKFHSEQKKLVA--HLV----CLSWSNPIFTMRDIQSLENHAEFDPSQLRF--FSGPCRYD-WGCFEDNAKP------VWLP---GT------HSLENTALYEEFTGQ--------IDFDE---PVEEACDGFK-DLPGHGPVPWKAWKTMMSDIPEDECA--QW-------S-HGVPPWEFMHWTF--IV--TIQV-SH--W-ECPNDFSSLG---CK-NMIFCLKFFY--KQ-AQQ-EGRAEEEPPWPR----QTVAF-V-Y-------AVRE---W--DETT-TIPYMLTDMFDHLAERFITYWSWEMDMN-HKTYFHFPKQIH---HRSDCC---HGHNWYFRAQSLIQRFIVTVIIQPQC-PQRGWT--NNYET----FAMGMMC-SNDAPVHVRYIDNKRYTIWIQ-------WAHRYVKW--D---CIWTDKTFGTHFT-TMEGTQYLKTNPIYKHFGKTCTYPIPKVVHSCV-KDNM-------H-AMAPFVYIVYVSWQEWRYTMWLKNDD-LFWMPA-GT----DG-MNKGERWK-RWTAEKGGLRWDIDAAFSNDFAKH-FI----RLINVKVWKRFI---LLSNQN--DRCMLLYTN-SKLNSEF



# CUDA Implementation

In [4]:
%%writefile smithwaterman.cu

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>

__constant__ int blosum62mat[25][25];

__device__ int place(char a) {
    switch(a) {
        case 'A': return 0;
        case 'R': return 1;
        case 'N': return 2;
        case 'D': return 3;
        case 'C': return 4;
        case 'Q': return 5;
        case 'E': return 6;
        case 'G': return 7;
        case 'H': return 8;
        case 'I': return 9;
        case 'L': return 10;
        case 'K': return 11;
        case 'M': return 12;
        case 'F': return 13;
        case 'P': return 14;
        case 'S': return 15;
        case 'T': return 16;
        case 'W': return 17;
        case 'Y': return 18;
        case 'V': return 19;
        case 'B': return 20;
        case 'J': return 21;
        case 'Z': return 22;
        case 'X': return 23;
        default: return 24;
    }
}

__device__ int score(char a, char b) {
    int dA,dB;
    dA = place(a);
    dB = place(b);
    return blosum62mat[dA][dB];
}

__global__ void mana(char* seq1, const int row, char* seq2, const int col, int* scoringmat, bool* gap, int diag) {
    int j = threadIdx.x;
    int i = diag - j;
    if (i >= 0 && i < row  && j < col) {
            if (i == 0 || j == 0) {
                scoringmat[i * col + j] = 0;
                gap[i * col + j] = false;
            } else {
        int hgapsc, vgapsc, xscore=1;
        if (!gap[(i-1)*col+j]) {
          hgapsc = scoringmat[(i-1)*col + j] - 5;
        } else {
          hgapsc = scoringmat[(i-1)*col+j] - 1;
        }
        if (!gap[i*col+(j-1)]) {
          vgapsc = scoringmat[i*col+(j-1)] - 5;
        } else {
          vgapsc = scoringmat[i*col+(j-1)] - 1;
        }
                xscore = scoringmat[(i-1) * col + (j-1)] + score(seq1[i-1],seq2[j-1]);
                if (vgapsc > hgapsc && vgapsc > xscore) {
                    scoringmat[i * col + j] = vgapsc;
                    gap[i * col + j] = true;
                } else if (hgapsc > vgapsc && hgapsc > xscore) {
                    scoringmat[i * col + j] = hgapsc;
                    gap[i * col + j] = true;
                } else {
                    scoringmat[i * col + j] = xscore;
                    gap[i * col + j] = false;
                }
                if (scoringmat[i * col + j] < 0) {
                    scoringmat[i * col + j] = 0;
                    gap[i * col + j] = true;
                }
            }
    }
}

void traceback(char* seq1, char* seq2, int* scoringmat, const int col, const int row ){
    int j=0, k= 0;
    int j_max = 0,  k_max = 0, max_val = 0;

    for(j = 0; j < row; j++){
        for(k= 0; k < col; k++){
            if(scoringmat[j*col+k] > max_val){
                max_val = scoringmat[j*col+k];
                j_max = j;
                k_max = k;
            }
        }
    }
    j = j_max;
    k = k_max;
    printf("\nIndex start at %d %d \n", j, k);
    char fin_seq1 [j_max + k_max + 1];
    char fin_seq2 [k_max + j_max + 1];
    int l =0;

     while (scoringmat[j*col+k] !=0 && j>=0 && k>=0) {
        int score_diagonal = scoringmat[(j-1)*col+(k-1)];
        int score_left = scoringmat[j*col+(k-1)];
        int score_up = scoringmat[(j-1)*col+k];

        if(score_diagonal > score_left && score_diagonal > score_up) {
            j--;
            k--;
            fin_seq1[l] = seq1[j];
            fin_seq2[l] = seq2[k];
            l++;
        }
        else if(score_left > score_diagonal && score_left > score_up){
            k--;
            fin_seq1[l] = '-';
            fin_seq2[l] = seq2[k];
            l++;
        }else{
            j--;
            fin_seq1[l] = seq1[j];
            fin_seq2[l] = '-';
            l++;
        }
    }

    fin_seq1[l] = '\0';
    fin_seq2[l] = '\0';

    for(int i = 0; i < strlen(fin_seq1); i++){
        if(fin_seq1[i] == '\0')
            break;
        printf("%c", fin_seq1[i]);

    }
    printf("\n");
    for(int i = 0; i < strlen(fin_seq2); i++){
        if(fin_seq2[i] == '\0')
            break;
        printf("%c", fin_seq2[i]);
    }
    printf("\n");
}

int main() {
    FILE* fp;
    int i = 0, j = 0;
    char c;
    char exseq1[1024] = "MQVRGGIFGQVPSQCLITLSYVWPNICQENKKRIWDTWDMPRKWSTTPYDDPQKPGSTYQGKCEPPQFHHISLKFCFHCFHYSCDGAPRLQVGQDIIQENSDQAAKYHFHKTDQYLQCLVWNDMFCQRKEHSVTRQRWACAGDMDMKGYHCCDIIYMELCHFDIGNVEHPFFCMMQGEEWFAHRMEIAGAHNHHWPPVANPCITIIMGSFSYAYKVPLSSILFESNLKSNTYLLMCRDNQNSLLIFRKWVKVNILRIFHKAFDNSFAADIDWRLGGKEATWRKWREGKNMGDTGAPMWFLDDNKLWMTYRWEQWSVYVIYVFPAMAHMNDKVCSHVVKPIPYTCTKGFHKYIPNTKLYQTGEMTTFHTGFTKDTWICDWKVYRHAWQIWITYRKNDIYRVHVPADNSCMMGMAFTEYNNTWGRQPCQPQIIVTVIFRQILSQARFYWNHGHCCDSRHHIQKPFHFYTKHNMDMEWSWYTIFREALHDFMDTLMYPITTTEDWERVAYVFAVTQRPWPPEEEARGEQQAQKYFFKLCFIMNKCGLSSFDNPCEWHSVQITVIFTWHMFEWPPVGHSWQACEDEPIDSMMTKWAKWPVPGHGPLDKFGDCAEEVPEDFDIQGTFEEYLATNELSHTGPLWVPKANDEFCGWDYRCPGSFFRLQSPDFEAHNELSQIDRMTFIPNSWSLCVLHAVLKKQESHFKSHKPSFHVRNAVTVKQPLPCQKKNDISEVCEWEWTTQWADQLLSNDCYCEKTNDIVRPHDAIANASEHMGQFPNRQDMASRELLSWKEGIFGAHAHWTLEYWGFRGPCAFQQAIALRYQALQLMGWIIGRAFVMDEQTEHSGMVPNHMEDVWHLDMVNEQTMITCCAVNIVSTRQTFQGHHPCDGYPMCYFKLTEAQIYMDKHQKTQKCDNKNHEWVQHTHQWWTGAMKYYFICKRWWSIWVKCPKFNEWYYKAVNDRMQFKVKILVDWVSGGDCVETCETVRMWLAAQMFCIITDECCRLQSLDIDHTKAMSKFATTSQME";
    char exseq2[1024] = "CAAEDQESFGVAFNDTMVDISYTDEYYHRPDLHCMHIYEAPFMKVGFRQKWQGKENMSNLKECWVSGLNKSYFAVMCQCIADCYYDEHMAIQNHKSGKMWCHWYMHCQHKWMSIHGKGEEFYRAQLNPGARCCFYGFPKAMYMFYDPAPTSGCSYEITCDYGSGPWTEKHWKKPFATVRCRDQMIVQFNFNLMKQIVGDHQARKCESVRIVIWLAHLYCEVQDHELSHPQEKNRMQSRFDGHGECAPIDPTWEVYVGEQEVCDQAWIGNTDHYAFTRHRRTAMGPCRMDWGVCHDCKLFCLAEWSCTDNHTGMNKQFIRPHGCFRPIKDIMYGTEVCFVVDPDQNNFLADIHRAPSMLRGKQQYFSRLFCIDGSEQKGKVTLGWNEEQRSNWHPMSQNWRISDHTYAFGMFAQPTHNWLHTDSKAHDTLGISSENFLAAFNVCDKSTDMSHFKSTWCCILESNGKMYFWSGKAPWRVFQRIDNWQCFRYWTCFLIFPELEIGEWGPEYECRGQADGHESCRHDCHLDDLTRVHKLHKEGAGYCCGHCRYTTQSSDGFWWQIAPCKKEIGKSKASQCLYIRPWESCIANQRWAYPRDMTLSWMGNRHCTRGEQPWDFQCKEIFHMSNIASTLAVWEKYSSLGSSGVDYLMYTHQCKDHDAGITLEGPLSDTFTVELHPPRHSIVYCNKPPWQGESCLFWNEMHLLYYGHMIRAHWVETMGHCKLEDWFTTRYLITGEYAFRNYRVCGSFCYMKNHRYQRVIVINHNRHHDGMVMKLMTKKCDKTMQDMYDPMQYAEFVRTKDWERRICVPKQAAMTSYDGHITDYDLDLFQKGLTDEFPIPPKLPVIWGSPMCHNFRMEFKPLMSDFQWLQETIHQCLIMQATAYKEIPYIHSWYQCQFVIGRHMMWTFGCTTAGHYCVPPICWHWTIESKVIEKDKLRKCIVRDAQRDWDLSNGYFQDPEQYPAKWYLWKLSWRCYSVSFQPVKGNSCGASNYNTMEGGEWHNALFLNCPFISHSPMTQIWYS";
    int h_blosum62mat[25][25];

    if ((fp = fopen("BLOSUM62.txt", "r")) == NULL) return 1;

    fscanf(fp, "%*[^\n]\n");
    fscanf(fp, "%*[^\n]\n");

    for (i = 0; i < 25; i++) {
        fscanf(fp, "%c", &c);
        for (j = 0; j < 25; j++) {
            fscanf(fp, "%d", &h_blosum62mat[i][j]);
        }
        fscanf(fp, "%c", &c);
    }

    fclose(fp);

    cudaMemcpyToSymbol(blosum62mat, h_blosum62mat, sizeof(int) * 25 * 25);

    const size_t row = (size_t)strlen(exseq1) + 1;
    const size_t col = (size_t)strlen(exseq2) + 1;
    const size_t ARRAY_BYTES = row * col * sizeof(int);
    const size_t GAP_ARRAY_BYTES = row * col * sizeof(bool);

    // declare array
    int* scoringmat;
    bool* gapmat;
    cudaMallocManaged(&scoringmat, ARRAY_BYTES);
    cudaMallocManaged(&gapmat, GAP_ARRAY_BYTES);
    char* d_seq1;
    char* d_seq2;
    cudaMallocManaged(&d_seq1, row * sizeof(char));
    cudaMallocManaged(&d_seq2, col * sizeof(char));
    cudaMemAdvise(d_seq1, row * sizeof(char), cudaMemAdviseSetPreferredLocation, cudaCpuDeviceId);
    cudaMemAdvise(d_seq1, row * sizeof(char), cudaMemAdviseSetReadMostly, cudaCpuDeviceId);
    cudaMemAdvise(d_seq2, col * sizeof(char), cudaMemAdviseSetPreferredLocation, cudaCpuDeviceId);
    cudaMemAdvise(d_seq2, col * sizeof(char), cudaMemAdviseSetReadMostly, cudaCpuDeviceId);
    cudaMemPrefetchAsync(d_seq1, row * sizeof(char), cudaCpuDeviceId, NULL);
    cudaMemPrefetchAsync(d_seq2, col * sizeof(char), cudaCpuDeviceId, NULL);
    cudaMemcpy(d_seq1, exseq1, row * sizeof(char), cudaMemcpyHostToDevice);
    cudaMemcpy(d_seq2, exseq2, col * sizeof(char), cudaMemcpyHostToDevice);

    // get gpu id
    int device = -1;
    cudaGetDevice(&device);
    cudaMemAdvise(scoringmat, ARRAY_BYTES, cudaMemAdviseSetPreferredLocation, device);
    cudaMemAdvise(gapmat, GAP_ARRAY_BYTES, cudaMemAdviseSetPreferredLocation, device);
    cudaMemPrefetchAsync(scoringmat, ARRAY_BYTES, device, NULL);
    cudaMemPrefetchAsync(gapmat, GAP_ARRAY_BYTES, device, NULL);

    int diag_count = row + col - 1;
    int max = (row > col) ? row : col;

    for (int i = 0; i < diag_count; i++) {
        mana<<<1, max>>>(d_seq1, row, d_seq2, col, scoringmat, gapmat, i);
        cudaDeviceSynchronize();
    }

    cudaMemPrefetchAsync(scoringmat, ARRAY_BYTES, cudaCpuDeviceId, NULL);

    traceback(exseq1, exseq2, scoringmat, col, row);

    /*
    for (int i = 0; i<row; i++) {
      for (int j = 0; j<col;j++) printf("%d ", scoringmat[i*col+j]);
      printf("\n");
    }
    */

    cudaFree(scoringmat);
    cudaFree(gapmat);
    cudaFree(d_seq1);
    cudaFree(d_seq2);

    return 0;
}

Writing smithwaterman.cu


In [5]:
%%shell
nvcc smithwaterman.cu -o smithwaterman



In [6]:
%%shell
nvprof ./smithwaterman

==1710== NVPROF is profiling process 1710, command: ./smithwaterman

Index start at 992 1006 
FMQA-ALWMRVTECTEV----CD-GGSVWDVLIK-VKFQMRDNVAKYYWENFKPCKVWISW-WRKCI-FYYKMAGTW----WQHTHQV-------WEHNKND---CKQTKQHKDMYIQAETL-K--FYC-MPYGDCPHHGQF-TQRTSVINVACC---TI-MT-QE--NV--MDLHWVDEMHNPVMGSHE-TQEDMVFARGI----IWGML--QLAQYRLAIAQQ-FA-CPGRFGWYELTWHAHAG----F--IGEKWSLLERSAMDQRNPFQGM-H-ESANA-IADHPRVIDNTKECYCDNSLL-QDAW-QT--TW-EWECVESI-DNKKQCPLPQ-KVTVAN-R-V---HFSPKHSKFHSEQKKLVA--HLV----CLSWSNPIFTMRDIQSLENHAEFDPSQLRF--FSGPCRYD-WGCFEDNAKP------VWLP---GT------HSLENTALYEEFTGQ--------IDFDE---PVEEACDGFK-DLPGHGPVPWKAWKTMMSDIPEDECA--QW-------S-HGVPPWEFMHWTF--IV--TIQV-SH--W-ECPNDFSSLG---CK-NMIFCLKFFY--KQ-AQQ-EGRAEEEPPWPR----QTVAF-V-Y-------AVRE---W--DETT-TIPYMLTDMFDHLAERFITYWSWEMDMN-HKTYFHFPKQIH---HRSDCC---HGHNWYFRAQSLIQRFIVTVIIQPQC-PQRGWT--NNYET----FAMGMMC-SNDAPVHVRYIDNKRYTIWIQ-------WAHRYVKW--D---CIWTDKTFGTHFT-TMEGTQYLKTNPIYKHFGKTCTYPIPKVVHSCV-KDNM-------H-AMAPFVYIVYVSWQEWRYTMWLKNDD-LFWMPA-GT----DG-MNKGERWK-RWT

