# Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats
from sklearn.metrics import roc_curve

from sklearn import metrics
import matplotlib.font_manager as font_manager
from numpy import nan

# Define variables

In [None]:
# define the variants that should be analyzed
# ABCA4_NCSS, ABCA4_DI or MYBPC3_NCSS
variants = 'ABCA4_NCSS'

# Define the column headers that are used in the dataframe
if 'NCSS' in variants:
    column_names = ['RNA','SSFL','MES','NNS','GS', 'SpliceRover', 'DSSP', 'SpliceAI','CAGI_MaPSy','CADD', 'SPIDEX', 'MMSplice','MTSplice','S-CAP']
else:
    column_names = ['RNA','SSFL','MES','NNS','GS', 'splicerover', 'DSSP', 'spliceai','cagi','cadd']

# Import the scores into a dataframe

In [None]:
# store the scores in a dataframe
di = pd.read_excel('variants_scores.xlsx', variants)

# replace missing values with 0
di = di.replace(nan, 0)

# Calculate the delta values 

In [None]:
# Define the name of the alamut programs with corresponding max value
delta = dict()
delta['SSFL'] = 100
delta['MES'] = 12
delta['NNS'] = 1
delta['GS'] = 15
delta['SpliceRover'] = 1
delta['DSSP'] = 1

# create a dictionary to store the values
delta_scores = dict()

for index in di.index:
        
    element = []
    
    # get the % mutant RNA
    value = di.at[index,'% Mutant RNA']
    if value > 20:
        element.append(1)
    else:
        element.append(0)
    
    # calulate the delta score of the alamut programs
    for name in delta:
        wt = name + '_wt'
        var = name + '_var'
        if di.at[index,wt] == 0:
            score = float(di.at[index,var])/delta[name]
        else: 
            score = (float(di.at[index,var])-float(di.at[index,wt]))/float(di.at[index,wt])
        element.append(np.absolute(score))
    
    # add the value of the SpliceAI score 
    element.append(di.at[index,'SpliceAI'])
    
    # add the absolute value of the Cagi score 
    element.append(np.absolute(di.at[index,'CAGI']))
    
    # add the absolute value of the CADD score 
    element.append(np.absolute(di.at[index,'CADD']))
    
    if 'NCSS' in variants:
    
        # add the absolute value of the SPIDEX score
        element.append(np.absolute(di.at[index,'Spidex']))

        # add the absolute value of the MMsplice score 
        element.append(np.absolute(di.at[index,'MMSplice']))

        # add the absolute value of the MMsplice score
        element.append(np.absolute(di.at[index,'MTSplice']))

        # add the S-SCAP score
        element.append(np.absolute(di.at[index,'SCAP']))
        
    delta_scores[index] = element

delta_df = pd.DataFrame(delta_scores)
delta_df = delta_df.transpose()
delta_df.columns = column_names

print(delta_df.head())

# ROC curves

In [None]:
# prepare the data 

# 1) List with classification (0,1)
label = []
for index in delta_df.index:
    value = delta_df.at[index,'RNA']
    if value > 0.2:
        label.append(1)
    else:
        label.append(0)
label = np.array(label)

# 2) list with probabilities predicted by the splicing prediction program 
probabilities = []
for name in column_names[1:]:
    probabilities.append(np.array(delta_df[name].tolist()))
    

# 3) Define the colors for the lines    
colors = ['olive', 'green', 'lawngreen', 'cyan', 'blue', 'purple',  'red', 'hotpink', 'orange', 'gold', 'grey', 'black', 'brown']

In [None]:
# create a dictionary to store the AUC values
aucs = {}

# Plot the ROC curve
plt.figure(figsize=(10,10))
for i in range(len(probabilities)):
    prob = probabilities[i]
    fper, tper, thresholds = roc_curve(label, prob, pos_label=1) 
    auc = metrics.roc_auc_score(label, prob)
    aucs[column_names[1:][i]] = auc
    plt.plot(fper, tper, color=colors[i], label=column_names[i+1]) # + ': ' + "{0:0.2f}".format(auc))

font_prop = font_manager.FontProperties(size=18)
    
plt.plot([0, 1], [0, 1], color='darkblue', linestyle='--')
plt.xlabel('False Positive Rate', size=18)
plt.ylabel('True Positive Rate', size = 18)
plt.title(('ROC Curve ' + variants + ' variants'), size = 20)
plt.tick_params(labelsize=18)
plt.legend(prop=font_prop)
plt.savefig(('ROC_' + variants + '.svg'),format='svg', dpi=1200)
plt.show()


In [None]:
print(aucs)