In [None]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from matplotlib.patches import Rectangle
import mpl_toolkits.mplot3d.art3d as art3d

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

# All alternations

## Prepare the chromosome info and data

In [None]:
hg38_chro_size = {'1': 248956422, '2':242193529, '3':198295559, '4':190214555, '5':181538259, '6':170805979,
                  '7': 159345973, '8':145138636, '9':138394717, '10':133797422, '11':135086622,
                  '12': 133275309, '13':114364328, '14':107043718, '15':101991189, '16':90338345,
                  '17':83257441, '18': 80373285, '19': 58617616, '20':64444167, '21':46709983,
                  '22':50818468, '23':156040895, '24':57227415}

In [None]:
bin_size = 1000000

In [None]:
hg38_chro = pd.DataFrame.from_dict(hg38_chro_size, orient='index', columns=['size'])
    

hg38_chro = hg38_chro.assign(bin= np.ceil(hg38_chro['size']/bin_size))

hg38_chro.index = hg38_chro.index.astype(int)

hg38_chro['tick'] = hg38_chro.apply(lambda x: 
                                   hg38_chro.loc[:x.name-1, 'bin'].sum() + 
                                   np.ceil(x['bin']/2), 
                               axis=1)  

hg38_chro['name'] = 'chr' + hg38_chro.index.astype(str)

hg38_chro['start'] = hg38_chro.apply(lambda x: 
                                   hg38_chro.loc[:x.name-1, 'bin'].sum(), 
                               axis=1)  

hg38_chro['end'] = hg38_chro.apply(lambda x: 
                                   hg38_chro.loc[:x.name, 'bin'].sum()-1, 
                               axis=1) 

In [None]:
hg38_chro

## Prepare disease data

In [None]:
def file2record(f):
    df = pd.read_csv(f, sep='\t')
    return df

In [None]:
path = '../data'
amps = []
dels = []
names = []
min_samples = 50
for organ in os.scandir(path):
    if organ.is_dir():
        print(organ.name)
        
        with open(os.path.join(organ, '_'.join([organ.name.lower(), 'counts.tsv']))) as fc:
            counts = pd.read_csv(fc, sep='\t')
            print(counts)
        
        for subtype in os.scandir(os.path.abspath(organ)):
            if subtype.is_dir():
                print('\t{}'.format(subtype.name))
                
                if counts.loc[counts['name'] == subtype.name, 'count'].values[0] < min_samples:
                    print('\t\t Not enough samples: {}'.format(subtype.name))
                else:                
                
                
                    for f in os.scandir(os.path.abspath(subtype)):
                        if len(f.name.split('_')) == 4:
                            if 'amp_genes' in f.name:
                                print('\t\t{}'.format(f.name))
                                amps.append(file2record(f))
                                names.append('-'.join([organ.name, subtype.name]))
                            if 'del_genes' in f.name:
                                print('\t\t{}'.format(f.name))
                                dels.append(file2record(f))
                        

In [None]:
amps = [x for _, x in sorted(zip(names,amps), key=lambda pair: pair[0])]
dels = [x for _, x in sorted(zip(names,dels), key=lambda pair: pair[0])]
names = sorted(names)

## prepare amp data

In [None]:
xs = []
ys = []
zs = []
xd = []
yd = []
zd = []
colors = []
cm = plt.get_cmap('Set2')
num_colors = len(cm.colors)
i = 0
for df, y in zip(amps, np.arange(len(amps))):
#     if i >4:
#         break
    df['bin'] = df.apply(lambda x: 
                                   hg38_chro.loc[:x['chr']-1, 'bin'].sum() + 
                                   np.ceil(x['start']/bin_size), 
                               axis=1)     
    data = df.groupby(by='bin').mean()
    
    size = data.shape[0]
    
    xs += data.index.astype(int).tolist()
    ys += [y] * size
    zs += [0] * size

    xd += [0.8] * size 
    yd += [0.2] * size 
    zd += data['cnv_scaled'].tolist()
    
    colors += [cm.colors[i % num_colors]] * size
    i += 1

## plot amps

In [None]:
plt.rcParams['figure.dpi'] = 200
fig = plt.figure(figsize=(30,20))

ax = fig.add_subplot(111, projection='3d')


for j in range(1,23):
    if j % 2 == 1:
        p = Rectangle((hg38_chro.loc[j,'start'], -0.2), hg38_chro.loc[j,'bin']-1,i-0.6, fc='grey', alpha=0.3, ec='black')
    else:
        p = Rectangle((hg38_chro.loc[j,'start'], -0.2), hg38_chro.loc[j,'bin']-1,i-0.6, fc='grey', alpha=0.1)
    ax.add_patch(p)
    art3d.pathpatch_2d_to_3d(p, z=0, zdir="z")

ax.bar3d(xs, ys, zs, xd, yd, zd, color=colors)

ax.set_xlabel('Chromosomes', labelpad=35, fontsize=15)
ax.set_ylabel('Subtypes', labelpad=150, fontsize=15)
ax.set_zlabel('Frequency', fontsize=15)
ax.w_yaxis.set_ticks(np.arange(0.3, i+0.3, 1))

ax.w_yaxis.set_ticklabels(names[:i], rotation=-10, fontsize=10,
                   horizontalalignment='left')

ax.w_xaxis.set_ticks(hg38_chro.loc[:22, 'tick'])
ax.w_xaxis.set_ticklabels(hg38_chro.loc[:22, 'name'], rotation=60, fontsize=10,
                   horizontalalignment='right')

ele=ax.elev
ax.view_init(elev=ele+40)
plt.savefig('bar3d_amp.pdf',bbox_inches='tight')

## prepare del data

In [None]:
xs = []
ys = []
zs = []
xd = []
yd = []
zd = []
colors = []
cm = plt.get_cmap('Set2')
num_colors = len(cm.colors)
i = 0
for df, y in zip(dels, np.arange(len(dels))):
#     if i >4:
#         break
    df['bin'] = df.apply(lambda x: 
                                   hg38_chro.loc[:x['chr']-1, 'bin'].sum() + 
                                   np.ceil(x['start']/bin_size), 
                               axis=1)     
    data = df.groupby(by='bin').mean()
    
    size = data.shape[0]
    
    xs += data.index.astype(int).tolist()
    ys += [y] * size
    zs += [0] * size

    xd += [0.8] * size 
    yd += [0.2] * size 
    zd += data['cnv_scaled'].tolist()
    
    colors += [cm.colors[i % num_colors]] * size
    i += 1

In [None]:
zd = np.abs(zd)

In [None]:
plt.rcParams['figure.dpi'] = 200
fig = plt.figure(figsize=(30,20))

ax = fig.add_subplot(111, projection='3d')


for j in range(1,23):
    if j % 2 == 1:
        p = Rectangle((hg38_chro.loc[j,'start'], -0.2), hg38_chro.loc[j,'bin']-1,i-0.6, fc='grey', alpha=0.3, ec='black')
    else:
        p = Rectangle((hg38_chro.loc[j,'start'], -0.2), hg38_chro.loc[j,'bin']-1,i-0.6, fc='grey', alpha=0.1)
    ax.add_patch(p)
    art3d.pathpatch_2d_to_3d(p, z=0, zdir="z")

ax.bar3d(xs, ys, zs, xd, yd, zd, color=colors)

ax.set_xlabel('Chromosomes', labelpad=35, fontsize=15)
ax.set_ylabel('Subtypes', labelpad=150, fontsize=15)
ax.set_zlabel('Frequency', fontsize=15)
ax.w_yaxis.set_ticks(np.arange(0.3, i+0.3, 1))

ax.w_yaxis.set_ticklabels(names[:i], rotation=-10, fontsize=10,
                   horizontalalignment='left')

ax.w_xaxis.set_ticks(hg38_chro.loc[:22, 'tick'])
ax.w_xaxis.set_ticklabels(hg38_chro.loc[:22, 'name'], rotation=60, fontsize=10,
                   horizontalalignment='right')

ele=ax.elev
ax.view_init(elev=ele+40)
plt.savefig('bar3d_del.pdf',bbox_inches='tight')

# High level alternations

## prepare disease data

In [None]:
amps = []
dels = []
names = []
min_samples = 50
for organ in os.scandir(path):
    if organ.is_dir():
        print(organ.name)

        with open(os.path.join(organ, '_'.join([organ.name.lower(), 'counts.tsv']))) as fc:
            counts = pd.read_csv(fc, sep='\t')
            print(counts)        
        
        for subtype in os.scandir(os.path.abspath(organ)):
            if subtype.is_dir():
                print('\t{}'.format(subtype.name))
                    
                if counts.loc[counts['name'] == subtype.name, 'count'].values[0] < min_samples:
                    print('\t\t Not enough samples: {}'.format(subtype.name))
                else:                      
                    
                    for f in os.scandir(os.path.abspath(subtype)):
    #                     if len(f.name.split('_')) == 4:
                        if 'high_amp_genes' in f.name:
                            print('\t\t{}'.format(f.name))
                            amps.append(file2record(f))
                            names.append('-'.join([organ.name, subtype.name]))
                        if 'high_del_genes' in f.name:
                            print('\t\t{}'.format(f.name))
                            dels.append(file2record(f))

In [None]:
amps = [x for _, x in sorted(zip(names,amps), key=lambda pair: pair[0])]
dels = [x for _, x in sorted(zip(names,dels), key=lambda pair: pair[0])]
names = sorted(names)

## prepare amp data

In [None]:
xs = []
ys = []
zs = []
xd = []
yd = []
zd = []
colors = []
cm = plt.get_cmap('Set2')
num_colors = len(cm.colors)
i = 0
for df, y in zip(amps, np.arange(len(amps))):
#     if i >4:
#         break
    df['bin'] = df.apply(lambda x: 
                                   hg38_chro.loc[:x['chr']-1, 'bin'].sum() + 
                                   np.ceil(x['start']/bin_size), 
                               axis=1)     
    data = df.groupby(by='bin').mean()
    
    size = data.shape[0]
    
    xs += data.index.astype(int).tolist()
    ys += [y] * size
    zs += [0] * size

    xd += [15] * size 
    yd += [0.1] * size 
    zd += data['cnv_scaled'].tolist()
    
    colors += [cm.colors[i % num_colors]] * size
    i += 1

## plot

In [None]:
plt.rcParams['figure.dpi'] = 200
fig = plt.figure(figsize=(30,20))

ax = fig.add_subplot(111, projection='3d')


for j in range(1,23):
    if j % 2 == 1:
        p = Rectangle((hg38_chro.loc[j,'start'], -0.2), hg38_chro.loc[j,'bin']-1,i-0.6, fc='grey', alpha=0.3, ec='black')
    else:
        p = Rectangle((hg38_chro.loc[j,'start'], -0.2), hg38_chro.loc[j,'bin']-1,i-0.6, fc='grey', alpha=0.1)
    ax.add_patch(p)
    art3d.pathpatch_2d_to_3d(p, z=0, zdir="z")

ax.bar3d(xs, ys, zs, xd, yd, zd, color=colors)

ax.set_xlabel('Chromosomes', labelpad=35, fontsize=15)
ax.set_ylabel('Subtypes', labelpad=150, fontsize=15)
ax.set_zlabel('Frequency', fontsize=15)
ax.w_yaxis.set_ticks(np.arange(0.3, i+0.3, 1))

ax.w_yaxis.set_ticklabels(names[:i], rotation=-10, fontsize=10,
                   horizontalalignment='left')

ax.w_xaxis.set_ticks(hg38_chro.loc[:22, 'tick'])
ax.w_xaxis.set_ticklabels(hg38_chro.loc[:22, 'name'], rotation=60, fontsize=10,
                   horizontalalignment='right')

ele=ax.elev
ax.view_init(elev=ele+40)
plt.savefig('bar3d_high_amp.pdf',bbox_inches='tight')

## prepare del data

In [None]:
xs = []
ys = []
zs = []
xd = []
yd = []
zd = []
colors = []
cm = plt.get_cmap('Set2')
num_colors = len(cm.colors)
i = 0
for df, y in zip(dels, np.arange(len(dels))):
#     if i >4:
#         break
    if df.shape[0] >0:

        df['bin'] = df.apply(lambda x: 
                                       hg38_chro.loc[:x['chr']-1, 'bin'].sum() + 
                                       np.ceil(x['start']/bin_size), 
                                   axis=1)     
        data = df.groupby(by='bin').mean()

        size = data.shape[0]

        xs += data.index.astype(int).tolist()
        ys += [y] * size
        zs += [0] * size

        xd += [15] * size 
        yd += [0.1] * size 
        zd += data['cnv_scaled'].tolist()

        colors += [cm.colors[i % num_colors]] * size
    i += 1

In [None]:
zd = np.abs(zd)

## plot

In [None]:
plt.rcParams['figure.dpi'] = 200
fig = plt.figure(figsize=(30,20))

ax = fig.add_subplot(111, projection='3d')


for j in range(1,23):
    if j % 2 == 1:
        p = Rectangle((hg38_chro.loc[j,'start'], -0.2), hg38_chro.loc[j,'bin']-1,i-0.6, fc='grey', alpha=0.3, ec='black')
    else:
        p = Rectangle((hg38_chro.loc[j,'start'], -0.2), hg38_chro.loc[j,'bin']-1,i-0.6, fc='grey', alpha=0.1)
    ax.add_patch(p)
    art3d.pathpatch_2d_to_3d(p, z=0, zdir="z")

ax.bar3d(xs, ys, zs, xd, yd, zd, color=colors)

ax.set_xlabel('Chromosomes', labelpad=35, fontsize=15)
ax.set_ylabel('Subtypes', labelpad=150, fontsize=15)
ax.set_zlabel('Frequency', fontsize=15)
ax.w_yaxis.set_ticks(np.arange(0.3, i+0.3, 1))

ax.w_yaxis.set_ticklabels(names[:i], rotation=-10, fontsize=10,
                   horizontalalignment='left')

ax.w_xaxis.set_ticks(hg38_chro.loc[:22, 'tick'])
ax.w_xaxis.set_ticklabels(hg38_chro.loc[:22, 'name'], rotation=60, fontsize=10,
                   horizontalalignment='right')

ele=ax.elev
ax.view_init(elev=ele+40)
plt.savefig('bar3d_high_del.pdf',bbox_inches='tight')