In [None]:
"""
    File : arrowNumberDriversCloneDiversity.ipynb
    Plot : arrows or vectors representing "evolutionary flows" over time, with respect to the average number of harboured drivers and number of clones
    Input : (related to several figure panels)
        SourceData_Fig5c.xlsx
        SourceData_ExtendedData_Fig8b.xlsx
"""

In [4]:
path_to_all_source_data = "../Source data for figures"

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.figure_factory as ff
import plotly.graph_objs as go
import os

In [19]:
def get_flow(
    df,
    feature_1,
    feature_2
):
    x = df[feature_1].values.tolist()
    y = df[feature_2].values.tolist()
    u = [x2-x1 for x1, x2 in zip(x[:-1], x[1:])]
    v = [x2-x1 for x1, x2 in zip(y[:-1], y[1:])]
    
    return x,y,u,v

def get_evol_traj(
    df,
    feature_1,
    feature_2
):
    xs = {}
    ys = {}
    us = {}
    vs = {}

    data_rows = []
    columns = ['SimID', 'x_plus_y_final', 'x_minus_y_final']

    for simid in df.SimID.unique():
        df_one = df.loc[df.SimID == simid]
        x,y,u,v = get_flow(
            df_one,
            feature_1,
            feature_2
        )
        xs[simid] = x[:-1]
        ys[simid] = y[:-1]
        us[simid] = u
        vs[simid] = v

        data_rows.append(
            (simid, x[-1]+y[-1], x[-1]-y[-1])
        )

    final_states = pd.DataFrame(
        data = data_rows,
        columns = columns
    )
    
    return xs,ys,us,vs,final_states

def make_flow_plot(
    xs,ys,us,vs,
    number_tracks_show,
    feature_1,
    feature_2,
    opacity=0.05,
    scale=1,
    arrow_scale=0.05
):
    list_traces = []
    
#     for simid in list(sorted(xs.keys()))[:number_tracks_show]:
    for simid in list(xs.keys())[:number_tracks_show]:
        x, y, u, v = xs[simid], ys[simid], us[simid], vs[simid]
        
        if len(x) < 2:
            print(f'SimID = {simid} has too few data points... Skip... ')
            continue

        fig1 = ff.create_quiver(
            x[:-1], y[:-1], u[:-1], v[:-1],
            scale=scale,
            name=f'ID={simid}',
            opacity=opacity,
            arrow_scale=arrow_scale
        )
        fig2 = ff.create_quiver(
            x[-1:], y[-1:], u[-1:], v[-1:],
            scale=scale,
            name=f'ID={simid}',
            #legendgroup=legendgroup
            arrow_scale=arrow_scale
        )
        
        list_traces.append(fig1.data[0])
        list_traces.append(fig2.data[0])

    fig = go.FigureWidget(list_traces)
    
    fig.update_layout(
        template='simple_white',
        autosize=True,
    )
    fig.update_xaxes(
        title_text=feature_1,
        title_font={'size': 12}
    )
    fig.update_yaxes(
        title_text=feature_2,
        title_font={'size': 12}
    )
    
    return fig

def read_and_plot(sheet_names):
    
    def config_font_size(figsize):
        params = {'legend.fontsize': 6,
                  'figure.figsize': figsize,
                 'axes.labelsize': 8,
                 'axes.titlesize': 8,
                 'xtick.labelsize':6,
                 'ytick.labelsize':6,
                 'pdf.fonttype':42}
        plt.rcParams.update(params)
    
    feature_1 = 'MacroSubcloneNumber'
    feature_2 = 'DriverNumberSliceAverage'
    for sheet_name in excelfile.sheet_names:
        
        print(sheet_name)
        
        data = pd.read_excel(
            excelfile, 
            sheet_name=sheet_name,
        )
    
        xs,ys,us,vs,final_states = get_evol_traj(
            data,
            feature_1,
            feature_2
        )

        fig = make_flow_plot(
            xs, ys, us, vs,
            number_tracks_show=50,
            feature_1=feature_1,
            feature_2=feature_2,
            arrow_scale=0.1,
            opacity=0.2,
        )

        fig.show()

# Figure 5c

In [5]:
path_to_excelfile = os.path.join(
    path_to_all_source_data,
    "Source_Data_Figure_5",
    "SourceData_Fig5c.xlsx"
)

In [6]:
excelfile = pd.ExcelFile(path_to_excelfile)
excelfile.sheet_names

['SourceDataFig5ci', 'SourceDataFig5cii']

In [20]:
read_and_plot(excelfile.sheet_names)

SourceDataFig5ci


SourceDataFig5cii


# Extended Data Figure 8b

In [21]:
path_to_excelfile = os.path.join(
    path_to_all_source_data,
    "Source_Data_Extended_Data_Figure_8",
    "SourceData_ExtendedData_Fig8b.xlsx"
)

In [22]:
excelfile = pd.ExcelFile(path_to_excelfile)
excelfile.sheet_names

['SourceDataEDFig8bi', 'SourceDataEDFig8bii']

In [23]:
read_and_plot(excelfile.sheet_names)

SourceDataEDFig8bi


SourceDataEDFig8bii
