In [1]:
from __future__ import division

import numpy as np
import os
import sys
import datetime
from subprocess import call
import subprocess
import glob
import djPyBio as DJ
import pandas as pd
import csv
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import copy 
import pybedtools
pd.set_option('display.max_columns', 500)



In [2]:
def value_counts_to_df(df, cols, percent=False):
    """ Function to produce a data frame from the value counts of multiple 
    categoricals, for making stackable barplots
    
    df = dataframe with categoricals
    cols = list of columns with the categoricals desired
    percent= return result in percent or not
    
    note that order is dictated by the presence of True/False in this case"""
    
    COL = ['Overlapping', 'Non_Overlapping']
    ind = []
    data = []
    for col in cols:
        ind.append(col)
        dat = [0,0]
        counts = df[col].value_counts().values
        labels = df[col].value_counts().keys().values
        
        for x,y in zip(labels, counts):
            if x == 'False':
                dat[1] = y
            else:
                dat[0]= y
                
        if percent==True:
            tot = sum(dat)
            for l in enumerate(dat):
                dat[l[0]] = (dat[l[0]]/tot) * 100
            data.append(dat)
        
        else:
            
            data.append(dat)
        
    df = pd.DataFrame(data, columns=COL, index = ind)

    return df


In [3]:
class Overlap_Functions(object):
    """ 
    takes input of gs dataframe
    or lumpy dataframe, can perform various functions
    on these results"""
    
    
    def __init__(self, df):
        self.df = df
    
    
    def header_reorder_vcf(self):
        desired_headers = ['CHROM', 'POS', 'END']
        
        head = self.df.columns.tolist()
        trunc_head = [x for x in head if x not in desired_headers]
        out_head = desired_headers + trunc_head
        ordered_df = df[out_head]
        
        # Fix headers to make similar
        header_dict = {'CHROM':'Chr', 'POS':'Start', 'END': 'End'}
        
        for key in header_dict:
            try:
                ordered_df = ordered_df.rename(columns={key:header_dict[key]})
            except:
                pass
            
        return ordered_df

    def Header_list(self, Head, type_list):
        out_head = [x for x in Head]
        
    
        for x,y in enumerate(out_head):
            
            
    
            if y =='CHROM' or y=='Chr':
        
                out_head[x]='Chr_' + str(type_list)
            elif y =='POS' or y=='Start':

                out_head[x]='Start_' + str(type_list)
            elif y =='END' or y=='End':
                out_head[x]='End_' + str(type_list)
            
            else: 
                out_head[x] = y + '_' + str(type_list)
        return out_head
    
    def UUID_Non_Ref(self,UUID):
        
        out_df= self.df[(self.df[UUID] > 2)|(self.df[UUID]<2)]
        return out_df
    
    def DF_BT(self, df):
        cols = ['Chr','Start','End','Coords']
        df_out = df[cols]
        BT = pybedtools.BedTool.from_dataframe(df_out)
        Header = df_out.columns.tolist()
        return BT,Header
    
   
    def BT_non_ref_UUID(self, UUID):
        
        test_closest = self.UUID_Non_Ref(UUID)
        return self.DF_BT(test_closest)[0]
    
    def closest_to_df(self, UUID):
        UUID_df = self.UUID_Non_Ref(UUID)
        closest_BT,headers = self.DF_BT(UUID_df)
        headers_A = self.Header_list(headers,'A')
        headers_B = self.Header_list(headers,'B')

        headers_out = headers_A + headers_B + ['Distance']
        
        closest = closest_BT.closest(closest_BT, d=True, io=True, t='first')
        
        closest_df = pd.read_table(closest.fn, names=headers_out)
#         closest_df['Genotype'] = self.df[self.df.Coords.isin(closest_df.Coords_A.tolist())][UUID].values
        closest_df['Genotype'] = self.df[closest_df.Coords_A][UUID].values
        
        return closest_df

In [11]:
def add_text_to_ax(ax, text, loc = (-.1,1.1), font=False, fontsize = 15):
    
    if type(ax)== list:
        for a, lab in zip(ax, text):
            a.text(loc[0], loc[1], lab , ha='center', va='center', transform=a.transAxes, fontsize=fontsize)
    else:
        t = ax.text(loc[0], loc[1], text , ha='center', va='center', transform=ax.transAxes, fontsize=fontsize)
        return t

In [4]:
def format_labels(ax, xlabel='none', ylabel='none', fontsizes=14, ticklabel_size = 14, rotation_x = 0, rotation_y=0):
    ax.tick_params(labelsize =ticklabel_size)
    if xlabel <> 'none':
        ax.set_xlabel(xlabel, fontsize=fontsizes)
    else:
        ax.xaxis.get_label().set_fontsize(fontsizes)
    
    if ylabel <> 'none':
        ax.set_ylabel(ylabel, fontsize=fontsizes)
    else:
        ax.yaxis.get_label().set_fontsize(fontsizes)
        
        
    x_ticks = ax.get_xticklabels() 
    y_ticks = ax.get_yticklabels()
    
    for t in x_ticks:
        
        t.set_rotation(rotation_x)
    for t in y_ticks:
        t.set_rotation(rotation_y)
        
    
    
        
        
        

def format_axes(ax, linewidth_gridlines=1, linewidth_spines = 3, color_gridlines = 'grey', color_spines = 'black', 
                     all_four=False):
    ticklines = ax.get_xticklines() + ax.get_yticklines()
    gridlines = ax.get_xgridlines() + ax.get_ygridlines()
    ticklabels = ax.get_xticklabels() + ax.get_yticklabels()

    for line in gridlines:
        line.set_color(color_gridlines)
        line.set_linewidth(linewidth_gridlines)
    
    for loc in ['bottom', 'left']:
        ax.spines[loc].set_color(color_spines)
        ax.spines[loc].set_linewidth(linewidth_spines)
    
    if all_four==True:
        for loc in ['right', 'top']:
            ax.spines[loc].set_color(color_spines)
            ax.spines[loc].set_linewidth(linewidth_spines)

In [2]:
# def clean_axis(ax, xlabel='none', ylabel='none', fontsizes=14, ticklabel_size = 14):
#     ax.tick_params(labelsize =ticklabel_size)
#     if xlabel <> 'none':
#         ax.set_xlabel(xlabel, fontsize=fontsizes)
#     else:
#         ax.xaxis.get_label().set_fontsize(fontsizes)
    
#     if ylabel <> 'none':
#         ax.set_ylabel(ylabel, fontsize=fontsizes)
#     else:
#         ax.yaxis.get_label().set_fontsize(fontsizes)