# Motif Merging Analysis

In [2]:
### header ###
__author__ = "Jenhan Tao"
__license__ = "BSD"
__email__ = "jenhantao@gmail.com"

### imports ###
import sys
import os
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt 
import seaborn as sns
import pickle
import time
from collections import Counter
### notebook specific configuration ###
%matplotlib inline
matplotlib.pylab.rcParams['savefig.dpi'] = 200
sys.setrecursionlimit(5000)
working_dir = '/gpfs/data01/glasslab/home/jtao/analysis/ap1_motif_merging/'
if not os.path.isdir(working_dir):
    os.mkdir(working_dir)
os.chdir(working_dir)
sns.set_context('notebook')
%load_ext autoreload
%autoreload 2

In [None]:
! wget http://jaspar.genereg.net/html/DOWNLOAD/JASPAR_CORE/pfm/nonredundant/pfm_vertebrates.txt

### Split Jaspar motif file into individual motifs

In [8]:
out_dir = './jaspar_motifs/'
if not os.path.isdir(out_dir):
    os.mkdir(out_dir)
with open('./pfm_vertebrates.txt') as f:
    data = f.readlines()
counter = 0
for i in range(0,len(data),6):
    nameline = data[i][1:]
    name_tokens = nameline.strip().split()
    jaspar_id = name_tokens[0]
    name = name_tokens[1]
    
    out_file = open(out_dir + name + '.jaspar', 'w')
    for j in range(6):
        out_file.write(data[i+j])
    out_file.close()
    

### Convert Files to TBA format

In [None]:
%%bash
if [ ! -d ./homer_motifs ];
then
    mkdir ./homer_motifs;
else
    rm ./homer_motifs/*;
fi

for i in ./jaspar_motifs/*jaspar;
    do /gpfs/data01/glasslab/home/jtao/code/tba/jaspar2homer.py $i ${i//jaspar/homer};
done

### Clean up Motif Names

In [27]:
%%bash
for i in ./homer_motifs/*homer;
do
    newname=${i/(var./_var}
    newname=${newname/)}
    if [ ! "$i" == $newname ];
    then
        mv $i $newname
    fi
done

### First Round

#### Score Motifs

In [20]:
!python /gpfs/data01/glasslab/home/jtao/code/tba/score_motifs.py ./motif_clustering ./homer_motifs/*homer

Reading motif files...
Calculating alignments between motifs and scoring motifs
scoring against 1/519
scoring against 2/519
scoring against 3/519
scoring against 4/519
scoring against 5/519
scoring against 6/519
scoring against 7/519
scoring against 8/519
scoring against 9/519
scoring against 10/519
scoring against 11/519
scoring against 12/519
scoring against 13/519
scoring against 14/519
scoring against 15/519
scoring against 16/519
scoring against 17/519
scoring against 18/519
scoring against 19/519
scoring against 20/519
scoring against 21/519
scoring against 22/519
scoring against 23/519
scoring against 24/519
scoring against 25/519
scoring against 26/519
scoring against 27/519
scoring against 28/519
scoring against 29/519
scoring against 30/519
scoring against 31/519
scoring against 32/519
scoring against 33/519
scoring against 34/519
scoring against 35/519
scoring against 36/519
scoring against 37/519
scoring against 38/519
scoring against 39/519
scoring against 40/519
scoring a

#### Cluster Motifs

In [49]:
!python /gpfs/data01/glasslab/home/jtao/code/tba/threshold_cluster_motifs.py ./motif_clustering/correlation.npz ./motif_clustering/ 0.9 ./homer_motifs/*

### Second Round

#### Score Motifs

!python /gpfs/data01/glasslab/home/jtao/code/tba/score_motifs.py ./motif_clustering_round2 ./motif_clustering/clustered_motifs/*motif

#### Cluster Motifs

In [11]:
!python /gpfs/data01/glasslab/home/jtao/code/tba/threshold_cluster_motifs.py ./motif_clustering_round2/correlation.npz ./motif_clustering_round2/ 0.9 ./motif_clustering/clustered_motifs/*motif

### Convert Clustered Motifs to Fimo Format

In [13]:
%%bash
if [ ! -d ./fimo_motifs ];
then
    mkdir ./fimo_motifs;
else
    rm ./fimo_motifs/*;
fi

for i in ./motif_clustering_round2/clustered_motifs/*motif;
    do motif_name=${i##*/};
    /gpfs/data01/glasslab/home/jtao/code/tba/homer2fimo.py $i ./fimo_motifs/${motif_name/.motif/.fimo} ;
done