In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
import statistics
plt.rc('figure', facecolor='w', figsize=[4,3])

In [None]:
ksizes = list(map(int,"11 15 20 25 50 100 200 500".split()))

data_jellyfish = []
for k in ksizes:
    nb_items = 0
    nb_missing = 0
    for line in open(f"jellyfish-k{k}.reads_stats"):
        median,mean,stdev = map(float,line.split())
        data_jellyfish += [(k,median,mean,stdev)]

ksizes = list(map(int,"2 4 6 8 10 12 15 20".split()))

data_kminmers = []
for k in ksizes:
    nb_items = 0
    nb_missing = 0
    for line in open(f"HG002-kminmer-abundance-in-CHM13v2-k{k}.50k.read_stats"):
        median,mean,stdev = map(float,line.split())
        data_kminmers  += [(k,median,mean,stdev)]

In [None]:
#attempt to merge both plots

def k_to_kspan(k):
    density=0.01
    return k/density

def k_to_kweight(k):
    l=31
    return k*l

data = []
for (k,median,mean,stdev) in data_jellyfish:
    data += [(k,k,median,"k-mers")]
for (k,median,mean,stdev) in data_kminmers:
    kspan = k_to_kspan(k)
    kweight = k_to_kweight(k)
    data += [(kspan,kweight,median,"k-min-mers")]

ktype = "kspan"
ktype = "kweight"
df = pd.DataFrame(data,columns=("kspan","kweight","median","type"))
  
sns_plot = sns.lineplot(df,x=ktype, y="median",hue="type", errorbar=('se',10))
sns_plot.set(yscale="log")
sns_plot.set(xscale="log")
plt.ylim(0.15, 10000)
from matplotlib.ticker import FuncFormatter
f = lambda x, pos: f'{int(x)}'
sns_plot.yaxis.set_major_formatter(FuncFormatter(f))
sns_plot.xaxis.set_major_formatter(FuncFormatter(f))
sns_plot.set(xticks=[10,100,500])
#sns_plot.set(xticks=[x/100 for x in range(0,10+1,2)])
#sns_plot.set_xticklabels([str(x-2) for x in range(0,10+3,2)])
#plt.xticks(sns_plot.get_xticks(), sns_plot.get_xticks() * 100)
if ktype == "kspan":
    sns_plot.set(xlabel='Equivalent k-mer length', ylabel='')
else:
    sns_plot.set(xlabel='Seed weight', ylabel='')
sns_plot.get_legend().remove()
plt.legend(loc='upper right')
plt.title('Median number of occurrences of seeds \n from the reads in the reference genome')
sns_plot.axhline(1, color='k', linestyle='--')
sns_plot.axvline(19, color='steelblue', linestyle='--')
if ktype == "kspan":
    sns_plot.axvline(k_to_kspan(7), color='orange', linestyle='--')
else:
    sns_plot.axvline(k_to_kweight(7), color='orange', linestyle='--')
if ktype == "kspan":
    plt.text(85, 225, "minimap2's k", horizontalalignment='right', size='small', color='steelblue')
else:
    plt.text(65, 225, "minimap2's k", horizontalalignment='right', size='small', color='steelblue')
if ktype == "kspan":
    plt.text(2500, 5, "mapquik's k", horizontalalignment='right', size='small', color='darkgoldenrod')
else:
    plt.text(650, 5, "mapquik's k", horizontalalignment='right', size='small', color='darkgoldenrod')

fig = sns_plot.get_figure()
if ktype == "kspan":
    # create second Axes. Note the 0.0 height
    ax2 = fig.add_axes((0.55,-0.1,0.32,0.0))  # i didn't determine those params automatically, these are just empirically set
    ax2.yaxis.set_visible(False) # hide the yaxis
    def tick_function(X):
        ksizes = list(map(int,"2 5 10 20".split()))
        return ["%d" % (ksizes[i]) for i,z in enumerate(X)]
    import math
    new_tick_locations = list(map(lambda x:math.log(x)/math.log(1.01),np.array([.1,0.2, .5,1.01])))
    ax2.set_xticks(new_tick_locations)
    ax2.set_xticklabels(tick_function(new_tick_locations))
    ax2.set_xlabel("k-min-mer's k value (density=0.01)")
plt.show()

fig.savefig("seeds_abundances_to_ref.pdf", bbox_inches='tight', dpi =300)
