<a href="https://colab.research.google.com/github/badonyi/acmgscaler/blob/main/acmgscaler.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# -*- coding: utf-8 -*-
import pandas as pd
import os
import sys
from google.colab import files, data_table
from io import StringIO
import rpy2
from IPython.display import clear_output

data_table.enable_dataframe_formatter()

#@markdown ## <b><font color='#85b0f5'>How to run this Colab?</font></b>
#@markdown #### Clicking the ▶ symbol on the left will bring up the upload button<font color='#db829e'>*</font>
#@markdown ##### <font color='#db829e'>*Make sure your file is in CSV format</font> (download an example [here](https://github.com/badonyi/acmgscaler/blob/main/example.csv))
#@markdown # ㅤ
#@markdown ### <b><font color='#85b0f5'>Settings</font></b>

print('Upload your CSV file:')
uploaded = files.upload()

if len(uploaded) == 1:
    filename = next(iter(uploaded))

    if os.path.exists(filename):
        os.remove(filename)

    df = pd.read_csv(StringIO(uploaded[filename].decode('utf-8')))

    clear_output()

    #@markdown #### Name of the column in your data containing the variant labels<font color='#db829e'>*</font>
    #@markdown ##### <font color='#db829e'>*There is no limit on the number of unclassified variants, but at least 10 P and 10 B labels are required</font>
    class_column = 'class'  # @param {type:'string'}
    #@markdown #### ㅤ
    #@markdown #### Name of the column in your data containing the variant scores
    value_column = 'score'  # @param {type:'string'}
    #@markdown #### ㅤ
    #@markdown #### Prior probability of pathogenicity<font color='#db829e'>*</font> (default 0.1)
    #@markdown ##### <font color='#db829e'>*This will not affect the likelihood ratio of pathogenicity, only the ACMG/AMP classification</font>
    prior = 0.1  # @param {type:'slider', min:0.01, max:0.99, step:0.001}

    if class_column in df.columns:
        df.rename(columns={class_column: 'class'}, inplace=True)
    else:
        print(f'Error: Class column "{class_column}" not found in the uploaded CSV. Please correct the column name and re-upload.')
        sys.exit()

    if value_column in df.columns:
        df.rename(columns={value_column: 'score'}, inplace=True)
    else:
        print(f'Error: Value column "{value_column}" not found in the uploaded CSV. Please correct the column name and re-upload.')
        sys.exit()

    # in case NAs are interpreted as floats
    df['class'] = df['class'].fillna('').astype(str)

    if 'rpy2.ipython' not in sys.modules:
        %load_ext rpy2.ipython

    %Rpush df prior
    %R devtools::install_github(repo = 'badonyi/acmgscaler', quiet = TRUE)
    %R calib <- acmgscaler::calibrate(df, prior = prior, value = 'score')
    %R df <- calib$likelihood_ratios
    %R th_df <- acmgscaler:::prettify_score_thresholds(calib$score_thresholds)
    %R utils::write.csv(df, 'result.csv', row.names = FALSE)

    if os.path.exists('result.csv'):
        try:
            files.download('result.csv')
        except Exception as e:
            print(f"Error during download: {e}")
    else:
        print('Error: Could not create result.csv')
else:
    print('Please upload exactly one CSV file.')

%R df

In [None]:
#@markdown ## <b><font color='#85b0f5'>Plot results</font></b>
%%R

if (!exists('df')) {
  stop('Data has not been detected, please run the first cell and calibrate your data.')
}

if (exists('prior')) {
  cat('Prior:', prior)
}

df <- df[order(df$score), ]
acmg_col <- c(
  b_very_strong = '#00204d',
  b_strong = '#3f5779',
  b_moderate = '#7f8fa6',
  b_supporting = '#bfc7d2',
  indeterminate = '#ffffff',
  p_supporting = '#f3d5de',
  p_moderate = '#e7acbe',
  p_strong = '#db829e',
  p_very_strong = '#cf597e'
)

pd <- suppressWarnings(density(df$score[df$class == 'P'], bw = 'bcv', n = 1024))
bd <- suppressWarnings(density(df$score[df$class == 'B'], bw = 'bcv', n = 1024))

xlim <- range(c(pd$x, bd$x))

options(repr.plot.width = 11, repr.plot.height = 6)
layout(matrix(c(1, 2), nrow = 2), heights = c(1, 2))
par(mar = c(0, 4.5, 0, 8))
plot(
  x = pd$x,
  y = pd$y,
  type = 'n',
  xlim = xlim,
  ylim = range(c(pd$y, bd$y)) * 1.05,
  axes = FALSE,
  xlab = '',
  ylab = ''
)

polygon(bd, col = adjustcolor(acmg_col[2], alpha.f = 0.8), border = NA)
polygon(pd, col = adjustcolor(acmg_col[8], alpha.f = 0.8), border = NA)

par(mar = c(5, 4.5, 0, 8))
plot(
  x = df$score,
  y = df$score_lr,
  log = 'y',
  type = 'n',
  xlim = xlim,
  ylim = c(0.001, 10000),
  xlab = 'Input score',
  ylab = 'Likelihood ratio',
  yaxt = 'n'
)

x_vals <- c(df$score, rev(df$score))
y_vals <- c(df$score_lr_lower, rev(df$score_lr_upper))
polygon(
  x = x_vals,
  y = y_vals,
  col = adjustcolor('grey70', alpha.f = 0.8),
  border = NA
)

lines(
  x = df$score,
  y = df$score_lr,
  lwd = 3,
  col = 'gray10'
)

axis(
  2,
  at = c(0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000),
  labels = c('0.001', '0.01', '0.1', '1', '10', '100', '1000', '10000'),
  las = 2,
  cex.axis = 0.8
)

t <- acmgscaler:::thresholds_upon_prior(prior)

abline(
  h = t,
  col = acmg_col[-5],
  lty = c(1, 4, 2, 3, 3, 2, 4, 1),
  lwd = 0.5
)

mtext(
  th_df$`ACMG/AMP evidence strength`,
  col = acmg_col[-5],
  side = 4,
  at = t,
  las = 2,
  line = 0.5,
  cex = 0.8
)

In [None]:
#@markdown ## <b><font color='#85b0f5'>Show score intervals</font></b>
%R if (!exists('th_df')) { print('Data has not been detected, please run the first cell to calibrate your data.') } else { th_df }