In [1]:
import pycircos
import warnings
import numpy as np
import pandas as pd
from os import listdir, mkdir
from os.path import isdir
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.backends.backend_pdf import PdfPages
import patchworklib as pw 

warnings.filterwarnings("ignore")

rcParams = {'font.size': 20, 'font.weight': 'normal', 'font.family': 'sans-serif',
            'axes.unicode_minus':False, 'axes.labelweight':'normal'}

plt.rcParams.update(rcParams)

Garc    = pycircos.Garc
Gcircle = pycircos.Gcircle

def Make_Counts(df_osa, df_osb, samples):
    osa_indicator = {}
    osb_indicator = {}
    osa_gbl_counts = np.zeros(2932766)
    osb_gbl_counts = np.zeros(3046682)

    for g in samples:
        try:
            osa = np.zeros(2932766)
            temp_osa = df_osa[df_osa['Sample'] == g]
            starts = temp_osa['Start'].tolist()
            ends = temp_osa['End'].tolist()
        
            for i in range(len(starts)):
                start = int(starts[i])
                end = int(ends[i])
                osa[start:end] += 10
                osa_gbl_counts[start:end] += 10

            osa[np.isnan(osa)] = 0
            #osa = np.array(pd.Series(osa).rolling(1000).sum())
            osa_indicator[g] = osa
        except KeyError:
            osa_indicator[g] = osa
            
        try:
            osb = np.zeros(3046682)
            temp_osb = df_osb[df_osb['Sample'] == g].reset_index()
            starts = temp_osb['Start'].tolist()
            ends = temp_osb['End'].tolist()
            for i in range(len(starts)):
                start = int(starts[i])
                end = int(ends[i])
                osb[start:end] += 10
                osb_gbl_counts[start:end] += 10

            osb[np.isnan(osb)] = 0
            #osb = np.array(pd.Series(osb).rolling(1000).sum())
            osb_indicator[g] = osb
        except KeyError:
            osb_indicator[g] = osb
    return osa_gbl_counts, osb_gbl_counts

def Max_Clique_Interval_Graph(group):
    if len(group) == 1:
        return pd.Series({'Max_Clique':1, 'Start':group['Start'].tolist()[0], 
                          'End': group['End'].tolist()[0], 'Num_Assignments': 1})
    
    group['Diff'] = group['Start'].shift(-1) - group['End']
    num_assign = len(group)
    max_clique = -1
    max_start, max_end = 0, 0
    clique = -1
    start, end = 0, 0
    
    difference = group['Diff'].tolist()
    starts = group['Start'].tolist()
    ends = group['End'].tolist()
    
    flag = False
    for i in range(0,len(difference)):
        if difference[i] <= 0 and flag == False:
            clique = 2
            start = starts[i]
            end = ends[i]
            flag = True
        elif difference[i] <= 0 and flag == True:
            clique += 1
        elif difference[i] > 0:
            if max_clique < clique:
                max_clique = clique
                max_start = start
                max_end = end
            flag = False
            clique = -1
    
    if max_clique < clique:
        max_clique = clique
        max_start = start
        max_end = end
            
    if max_clique == -1:
        max_clique = 1
        i = np.argmin(difference)
        max_start = starts[i]
        max_end = ends[i]
    
    return pd.Series({'Max_Clique':max_clique, 'Start':max_start, 
                      'End': max_end, 'Num_Assignments':num_assign})         

  import pandas.util.testing as tm


In [2]:
novel_contigs = {}
novel_contig_path = '/Users/harihara/Research-Activities/Data/Hot-Spring/Missing_Contig_Coords/'
samples = listdir(novel_contig_path+'OSA/')
for s in samples:
    df_osa = pd.read_csv(novel_contig_path+'OSA/'+s, sep = "\t")
    df_osa_grp = df_osa.sort_values(by = ['Contig','Start']).groupby(['Contig']).apply(Max_Clique_Interval_Graph)
    df_osb = pd.read_csv(novel_contig_path+'OSB/'+s, sep = "\t")
    df_osb_grp = df_osb.sort_values(by = ['Contig','Start']).groupby(['Contig']).apply(Max_Clique_Interval_Graph)
    novel_contigs[s.replace(".txt","")] = {'OSA':df_osa_grp,'OSB':df_osb_grp}

In [3]:
grp_path = '/Users/harihara/Research-Activities/Data/Hot-Spring/contig_containment_groups_subset_filtered.txt'
df_novel_filtered = pd.read_csv(grp_path, sep = "\t")
d = df_novel_filtered.groupby('GroupID')['Contig'].apply(list).to_dict()

In [4]:
out_dir = '/Users/harihara/Research-Activities/Plots/Hot_Spring_Plots/Synechococcus-Paper/Novel_Groups_Circos/'
if not isdir(out_dir):
    mkdir(out_dir)


In [7]:
i = 0
for g in list(d.keys())[78:]:
    contigs = d[g]
    osa_contig_count, osb_contig_count = 0, 0
    osa_contigs, osb_contigs = [], []
    df_osa, df_osb = pd.DataFrame(), pd.DataFrame()
    
    for c in contigs:
        splits = c.split('_')
        
        if len(splits) == 5:
            sample = splits[0]+'_'+splits[1]
            genome = splits[2].upper()
            contig = splits[3]+'_'+splits[4]
        if len(splits) == 4:
            sample = splits[0]
            genome = splits[1].upper()
            contig = splits[2]+'_'+splits[3]
        if genome == "OSA":
            osa_contig_count += 1
        if genome == "OSB":
            osb_contig_count += 1
        
        try:
            row = novel_contigs[sample][genome].loc[contig]
            row['Group'] = g
            row['Sample'] = sample
            if genome == 'OSA':
                df_osa = df_osa.append(row)
            elif genome == 'OSB':
                df_osb = df_osb.append(row)
        except:
            pass
    
    if len(df_osa) > 0:
        df_osa = df_osa.reset_index()
    if len(df_osb) > 0:
        df_osb = df_osb.reset_index()
    
    osa_counts, osb_counts = Make_Counts(df_osa, df_osb, list(novel_contigs.keys()))
    
    garc_osa = Garc(arc_id="OSA", interspace=0, linewidth=0, facecolor="#FFFFFF00", raxis_range=(0,10),
                    label=("Synechococcus Sub.Sp A\n"+"#Contigs (in Group):"+str(osa_contig_count)+
                           "\n#Contigs (in Graph):"+str(len(df_osa))), labelsize = 24,label_visible=True)
    garc_osb = Garc(arc_id="OSB", interspace=0, linewidth=0, facecolor="#FFFFFF00", raxis_range=(0,10),
                    label=("Synechococcus Sub.Sp B\n"+"#Contigs (in Group):"+str(osb_contig_count)+
                           "\n#Contigs (in Graph):"+str(len(df_osb))), labelsize = 24,label_visible=True)
    
    gcircle_osa = Gcircle(fig=pw.Brick._figure)
    gcircle_osa.add_garc(garc_osa)
    gcircle_osa.set_garcs()
    gcircle_osa.lineplot('OSA',osa_counts+0.000001,raxis_range=(800,1000), rlim = (0, 34),
                         linewidth = 4, linecolor = 'green')
    
    gcircle_osb = Gcircle(fig=pw.Brick._figure)
    gcircle_osb.add_garc(garc_osb)
    gcircle_osb.set_garcs()
    gcircle_osb.lineplot('OSB',osb_counts+0.000001,raxis_range=(800,1000), 
                         rlim = (0, 34), linewidth = 4, linecolor = 'orange')
    
    pw.param["margin"] = 0.0001
    
    circos12 = pw.cBrick(ax=gcircle_osa.ax, figsize = (16,10)) | pw.cBrick(ax=gcircle_osb.ax, figsize = (16,10))
    circos12.set_suptitle(g, size = 60)
    circos12.savefig(out_dir+g+'.pdf')
    print(i, g)
    i+=1
    plt.close("all")
    

0 Group_613
1 Group_615
2 Group_62
3 Group_627
4 Group_629
5 Group_634
6 Group_636
7 Group_644
8 Group_652
9 Group_661
10 Group_665
11 Group_666
12 Group_668
13 Group_685
14 Group_688
15 Group_692
16 Group_695
17 Group_700
18 Group_723
19 Group_726
20 Group_728
21 Group_730
22 Group_736
23 Group_742
24 Group_746
25 Group_749
26 Group_768
27 Group_774
28 Group_780
29 Group_786
30 Group_797
31 Group_799
32 Group_802
33 Group_809
34 Group_835
35 Group_844
36 Group_851
37 Group_854
38 Group_856
39 Group_857
40 Group_861
41 Group_873
42 Group_878
43 Group_879
44 Group_886
45 Group_888
46 Group_904
47 Group_915
48 Group_920
49 Group_928
50 Group_935
51 Group_938
52 Group_943
53 Group_947
54 Group_951
55 Group_954
56 Group_955
57 Group_961
58 Group_963
59 Group_967
60 Group_970
61 Group_978
62 Group_980
63 Group_991


In [16]:
from PyPDF2 import PdfMerger

In [17]:
pdfs = listdir(out_dir)
pdfs.sort()
merger = PdfMerger()
for pdf in pdfs:
    if pdf.startswith('Group'):
        merger.append(out_dir+'/'+pdf)
        print(pdf)

Group_1003.pdf
Group_1008.pdf
Group_1012.pdf
Group_1013.pdf
Group_1015.pdf
Group_1016.pdf
Group_1021.pdf
Group_1025.pdf
Group_1030.pdf
Group_1034.pdf
Group_1040.pdf
Group_1042.pdf
Group_1044.pdf
Group_105.pdf
Group_1051.pdf
Group_1058.pdf
Group_1066.pdf
Group_1075.pdf
Group_1079.pdf
Group_1098.pdf
Group_1106.pdf
Group_1153.pdf
Group_1156.pdf
Group_1174.pdf
Group_1178.pdf
Group_1184.pdf
Group_1189.pdf
Group_125.pdf
Group_128.pdf
Group_130.pdf
Group_175.pdf
Group_179.pdf
Group_180.pdf
Group_182.pdf
Group_187.pdf
Group_211.pdf
Group_218.pdf
Group_223.pdf
Group_23.pdf
Group_249.pdf
Group_258.pdf
Group_284.pdf
Group_286.pdf
Group_290.pdf
Group_306.pdf
Group_312.pdf
Group_321.pdf
Group_322.pdf
Group_323.pdf
Group_333.pdf
Group_335.pdf
Group_353.pdf
Group_364.pdf
Group_365.pdf
Group_382.pdf
Group_40.pdf
Group_406.pdf
Group_416.pdf
Group_419.pdf
Group_423.pdf
Group_437.pdf
Group_464.pdf
Group_476.pdf
Group_482.pdf
Group_485.pdf
Group_495.pdf
Group_498.pdf
Group_501.pdf
Group_508.pdf
Group_519.

In [18]:
merger.write(out_dir+'/Circos.pdf')
merger.close()