In [11]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import bokeh as bk

from bokeh.layouts import column
from bokeh.models import CustomJS, ColumnDataSource, Slider
from bokeh.plotting import figure, output_file, show

from math import log, sqrt

import numpy as np

import panel as pn
import panel.widgets as pnw
pn.extension()


### Load and pre-process the files by Tatev

In [12]:
fold_changes_data = pd.read_csv("../Data/fold_changes.csv")#.iloc[1:, :]
normalized_counts_data = pd.read_csv("../Data/counts_normalized.csv")

fold_changes_columns = ['serotype', 
                        'FC_Enriched1_HDFplusRT', 
                        'FC_Enriched1_LonzaDCplusRT']

normalized_counts_columns = ['serotype', 
                             'Chimeric Count', 
                             'Count_Enriched1_HDFplusRT',
                             'Count_Enriched1_LonzaDCplusRT']

fold_changes_data.columns = fold_changes_columns
normalized_counts_data.columns = normalized_counts_columns

# correct the var types
fold_changes_data[fold_changes_columns[1:3]] = fold_changes_data[fold_changes_columns[1:3]].astype('float32')
normalized_counts_data[normalized_counts_columns[1:4]] = normalized_counts_data[normalized_counts_columns[1:4]].astype('float32')

print(fold_changes_data.head())
#print(normalized_counts_data.head())


     serotype  FC_Enriched1_HDFplusRT  FC_Enriched1_LonzaDCplusRT
0  AAV.100002                0.110000                        0.62
1  AAV.100001                0.070000                        0.75
2  AAV.100011                0.120000                        0.71
3  AAV.100005                0.180000                        0.74
4  AAV.100003              502.859985                        4.64


In [13]:
FC_threshold = 1
#fold_changes_data = fold_changes_data[fold_changes_data['FC: Enriched1_HDFplusRT'] > FC_threshold].reset_index()

fold_changes_data = fold_changes_data[(fold_changes_data[fold_changes_columns[1:3]] > 
                                       FC_threshold).any(axis=1)].reset_index()

num_serotypes = len(fold_changes_data['serotype'])

print(num_serotypes)




38


In [14]:
fold_changes_data.head(2)




Unnamed: 0,index,serotype,FC_Enriched1_HDFplusRT,FC_Enriched1_LonzaDCplusRT
0,4,AAV.100003,502.859985,4.64
1,5,AAV.100006,139.169998,3.86


### Radar for fold changes

In [15]:

df = fold_changes_data

serotype_color = dict([("Enriched1_HDFplusRT",   "#0d3362"),
                       ("Enriched1_LonzaDCplusRT", "#c64737"),])




fig_width = 900
fig_height = 900

alpha = np.pi/25.

inner_radius = 20
start_radius = 90
outer_radius = 300 - 10

# main frame of the fig
p = figure(width = fig_width, height = fig_height, title = "Fold Changes",
           x_axis_type = None, y_axis_type = None,
           x_range = (-420, 420), y_range = (-420, 420),
           min_border = 0, outline_line_color = "black",
           background_fill_color = "#f0e1d2", match_aspect = True)

# bace wedge
colors = ["#e69584" for indx in df.serotype]
p.annular_wedge(0, 0, inner_radius, outer_radius, 
                      np.pi/2. + alpha, np.pi/2. - alpha, 
                      color = "#e69584", name = "Base wedge")

# circular grids and their labels
labels = np.linspace(0, 10, 5)
radii = np.linspace(start_radius, outer_radius, 5)
p.circle(0, 0, radius = radii, fill_color = None, line_color = "white", line_width = 1)
p.text(0, radii + 8, [str(ax_label) for ax_label in labels],
          text_font_size = "14px", text_align = "center", text_baseline = "middle")





angle = (2.0*np.pi - 2.*alpha)/(len(df) + 1)
angles = np.pi/2 - alpha - angle/2 - df.index.to_series()*angle

df['log2-FC: Enriched1_HDFplusRT'] = start_radius + np.log2(df['FC_Enriched1_HDFplusRT'].replace(0., np.nan))*20.
df['start_angle Enriched1_HDFplusRT'] = -angle + angles + 4.5*angle/6.
df['end_angle Enriched1_HDFplusRT'] = -angle + angles + 6.*angle/6.

df['log2-FC: Enriched1_LonzaDCplusRT'] = start_radius + np.log2(df['FC_Enriched1_LonzaDCplusRT'].replace(0., np.nan))*20.
df['start_angle Enriched1_LonzaDCplusRT'] = -angle + angles + 3.8*angle/6.
df['end_angle Enriched1_LonzaDCplusRT'] = -angle + angles + 6.7*angle/6.

source = ColumnDataSource(df)


# small wedges
p.annular_wedge(x = 0, y = 0, 
                inner_radius = start_radius, outer_radius = "log2-FC: Enriched1_HDFplusRT",
                start_angle = "start_angle Enriched1_HDFplusRT", end_angle = "end_angle Enriched1_HDFplusRT", 
                source = source, color = "#0d3362", name = "wedges_1")


p.annular_wedge(x = 0, y = 0, 
                inner_radius = start_radius, outer_radius = "log2-FC: Enriched1_LonzaDCplusRT",
                start_angle = "start_angle Enriched1_LonzaDCplusRT", end_angle = "end_angle Enriched1_LonzaDCplusRT", 
                source = source, color="#c64737", name = "wedges_2", alpha = 0.7)




# serotype labels
xr = 3.7*radii[0]*np.cos(np.array(-angle + angles + 10.5*angle/12.))
yr = 3.7*radii[0]*np.sin(np.array(-angle + angles + 10.5*angle/12.))

label_angles = np.array(-angle/2. + angles)
label_angles[label_angles < -np.pi/2] += np.pi
p.text(xr, yr, df.serotype, angle = label_angles,
               text_font_size = "14px", text_align = "center", text_baseline = "middle")




p.rect([-350, -350], [-380, -350], width = 30, height = 13, color = list(serotype_color.values()))
p.text([-320, -320], [-380, -350], text = list(serotype_color),
       text_font_size = "14px", text_align = "left", text_baseline = "middle")

# add a tooltip
tooltips = [("Serotype", "@serotype"), 
            ("Fold change: HDFplusRT", "@FC_Enriched1_HDFplusRT"),
            ("Fold change: LonzaDCplusRT", "@FC_Enriched1_LonzaDCplusRT")]
p.add_tools(bk.models.HoverTool(tooltips = tooltips, mode="mouse", point_policy="follow_mouse", 
                                names = ["wedges_1", "wedges_2"]))
show(p)

