In [1]:
import pandas as pd
import numpy as np
import dataframe_image as dfi   # you will need to pip install this library

In [2]:
df = pd.read_csv('growthrates.mts10k.mbs50.buckets.none.mutrans.data.single.5000.1.50.None.S_50.0.nueff_100.tau_100.0.T_50000_10000.s0.csv', index_col=0)
pangos = set(df['Variant Name'].tolist())

In [3]:
def get_growth_rates(dataframe, num_lineages=20):
    rates = []
    
    for pango in pangos:
        df_pango = dataframe[dataframe['Variant Name'] == pango]
        means = df_pango.GrowthRate.values
        mean = means.mean()
        var = np.square(df_pango.GrowthRateStd.values).mean() + \
              np.square(means).mean() - np.square(mean)
        rates.append( (mean, np.sqrt(var), pango) )
        
    rates = sorted(rates)
    means = np.array([r[0] for r in rates[-num_lineages:]][::-1])
    stds = np.array([1.96 * r[1] for r in rates[-num_lineages:]][::-1])
    lineages = np.array([r[2] for r in rates[-num_lineages:]][::-1])
    
    return means, stds, lineages

## Tables

In [4]:
def export_growth_rate_table(dataframe, start=0, finish=20, filename='growth_rates_table.png'):
    means, stds, lineages = get_growth_rates(df, num_lineages=finish)
    growth_rate = ['{:.3f} Â± {:.3f}'.format(b, s) for b, s in zip(means, stds)]
    dataframe = np.stack([lineages, growth_rate]).T
    dataframe = pd.DataFrame(dataframe, index=np.arange(1, finish+1), 
                             columns=['Lineage', 'Growth Rate'])
    dataframe = dataframe.iloc[start:finish]
    dfi.export(dataframe, filename)

In [5]:
export_growth_rate_table(df, start=0, finish=20, filename='top_growth_rates.png')