Skip to content

Commit

Permalink
Plot fit update (#136)
Browse files Browse the repository at this point in the history
* Fix duplication of names in QC function

* Calculate random correction when plotting_fit

* Add information to plot_fit example

* Change name of fit example file

* Remove outdated web action

* Bump required sketchlib version

Co-authored-by: John Lees <lees.john6@gmail.com>
  • Loading branch information
nickjcroucher and johnlees committed Dec 21, 2020
1 parent 1d49132 commit 5a7eb12
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 50 deletions.
34 changes: 0 additions & 34 deletions .github/workflows/master_poppunk-web.yml

This file was deleted.

31 changes: 21 additions & 10 deletions PopPUNK/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def plot_scatter(X, scale, out_prefix, title, kde = True):
plt.savefig(out_prefix + ".png")
plt.close()

def plot_fit(klist, matching, fit, out_prefix, title):
def plot_fit(klist, raw_matching, raw_fit, corrected_matching, corrected_fit, out_prefix, title):
"""Draw a scatter plot (pdf) of k-mer sizes vs match probability, and the
fit used to assign core and accessory distance
Expand All @@ -81,28 +81,39 @@ def plot_fit(klist, matching, fit, out_prefix, title):
Args:
klist (list)
List of k-mer sizes
matching (list)
raw_matching (list)
Proportion of matching k-mers at each klist value
kfit (numpy.array)
Fit to klist and matching from :func:`~PopPUNK.sketchlib.fitKmerCurve`
raw_fit (numpy.array)
Fit to klist and raw_matching from :func:`~PopPUNK.sketchlib.fitKmerCurve`
corrected_matching (list)
Corrected proportion of matching k-mers at each klist value
corrected_fit (numpy.array)
Fit to klist and corrected_matching from :func:`~PopPUNK.sketchlib.fitKmerCurve`
out_prefix (str)
Prefix for output plot file (.pdf will be appended)
title (str)
The title to display above the plot
"""
k_fit = np.linspace(0, klist[-1], num = 100)
matching_fit = (1 - fit[1]) * np.power((1 - fit[0]), k_fit)
raw_matching_fit = (1 - raw_fit[1]) * np.power((1 - raw_fit[0]), k_fit)
corrected_matching_fit = (1 - corrected_fit[1]) * np.power((1 - corrected_fit[0]), k_fit)

fig, ax = plt.subplots()
ax.set_yscale("log")
ax.set_xlabel('k-mer length')
ax.set_ylabel('Proportion of matches')
ax.set_xlabel('k-mer length', fontsize = 9)
ax.set_ylabel('Proportion of matches', fontsize = 9)
ax.tick_params(axis='both', which='major', labelsize=9)
ax.tick_params(axis='both', which='minor', labelsize=9)

plt.tight_layout()
plt.plot(klist, matching, 'o')
plt.plot(k_fit, matching_fit, 'r-')
plt.plot(klist, raw_matching, 'o', label= 'Raw matching k-mer proportion')
plt.plot(k_fit, raw_matching_fit, 'b-', label= 'Fit to raw matches')
plt.plot(klist, corrected_matching, 'mx', label= 'Corrected matching k-mer proportion')
plt.plot(k_fit, corrected_matching_fit, 'm--', label= 'Fit to corrected matches')

plt.title(title)
plt.legend(loc='upper right', prop={'size': 8})

plt.title(title, fontsize = 10)
plt.savefig(out_prefix + ".pdf", bbox_inches='tight')
plt.close()

Expand Down
16 changes: 11 additions & 5 deletions PopPUNK/sketchlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,12 +512,18 @@ def queryDatabase(rNames, qNames, dbPrefix, queryPrefix, klist, self = True, num
for plot_idx in range(number_plot_fits):
example = sample(rNames, k=2)
raw = np.zeros(len(klist))
corrected = np.zeros(len(klist))
for kidx, kmer in enumerate(klist):
raw[kidx] = pp_sketchlib.jaccardDist(ref_db, example[0], example[1], kmer)

fit = fitKmerCurve(raw, klist, jacobian)
plot_fit(klist, raw, fit,
dbPrefix + "/fit_example_" + str(plot_idx + 1),
raw[kidx] = pp_sketchlib.jaccardDist(ref_db, example[0], example[1], kmer, False)
corrected[kidx] = pp_sketchlib.jaccardDist(ref_db, example[0], example[1], kmer, True)
raw_fit = fitKmerCurve(raw, klist, jacobian)
corrected_fit = fitKmerCurve(corrected, klist, jacobian)
plot_fit(klist,
raw,
raw_fit,
corrected,
corrected_fit,
dbPrefix + "/" + dbPrefix + "_fit_example_" + str(plot_idx + 1),
"Example fit " + str(plot_idx + 1) + " - " + example[0] + " vs. " + example[1])
else:
query_db = queryPrefix + "/" + os.path.basename(queryPrefix)
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies:
- hdbscan
- rapidnj
- h5py
- pp-sketchlib >=1.5.1
- pp-sketchlib >=1.6.0
- graph-tool
- requests
- flask
Expand Down

0 comments on commit 5a7eb12

Please sign in to comment.