In [None]:
# importing relevant modules and packages 
import pandas as pd
import pandasql
from pandasql import sqldf
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import matplotlib.transforms as mtransforms
import seaborn as sns
import matplotlib.cbook as cbook

from matplotlib import rcParams

from pandasql import sqldf
pysqldf = lambda q: sqldf(q, globals())

In [None]:
#import multiplettests
from statsmodels.stats.multitest import multipletests

In [None]:
#importing correlation functions
from scipy import stats
from scipy.stats import kendalltau, pearsonr, spearmanr

In [None]:
#Creating the list of all columns from the raw data in the order desired
CTS_type = ['CTSA_pH 4.5', 
            'CTSB_pH 4.5_pH 5.5',
            'CTSD_pH 3.4_pH 4.5', 
            'CTSE_pH 3.4_pH 4.5', 
            'CTSF_pH 4.5',
            'CTSK_pH 4.5', 
            'CTSL_pH 4.5_pH 5.5', 
            'CTSO_pH 5.5',
            'CTSS_pH 4.5_pH 5.5', 
            'CTSV_pH 3.4_pH 4.5', 
            'CTSX_pH 3.4_pH 4.5',
            'AEP_pH 4.5_pH 5.5']

In [None]:
#ASyn

In [None]:
#Selecting ASyn data from the file
df_asyn = pd.read_excel('Supplementary Data 1.xlsx', sheet_name = 'ASyn')
df_asyn = df_asyn.set_index('position of P1 site in protein')

In [None]:
#Viewing ASyn data
df_asyn

In [None]:
#Specifying the order in which we want proteases to appear in our data using CTS_type list
df_asyn_rearr = df_asyn.reindex(columns=CTS_type)
df_asyn_rearr.head(2)

In [None]:
#Renaming the columns
df_asyn_rearr.rename(columns = {'CTSA_pH 4.5':'A',
            'CTSB_pH 4.5_pH 5.5':'B',
            'CTSD_pH 3.4_pH 4.5':'D', 
            'CTSE_pH 3.4_pH 4.5':'E',  
            'CTSF_pH 4.5':'F',
            'CTSK_pH 4.5':'K', 
            'CTSL_pH 4.5_pH 5.5':'L', 
            'CTSO_pH 5.5':'O',
            'CTSS_pH 4.5_pH 5.5':'S', 
            'CTSV_pH 3.4_pH 4.5':'V', 
            'CTSX_pH 3.4_pH 4.5':'X',
            'AEP_pH 4.5_pH 5.5':'AEP'}, 
                                   inplace = True)
df_asyn_rearr.head(5)



In [None]:
#ASyn--Sum of individual protease cleavages
asyn_individual_cathepsin = (df_asyn_rearr == 1).astype(int).sum(axis= 'rows')
total = asyn_individual_cathepsin.sum()

#Arranging in the order of increasing cleavages
asyn_individual_cathepsin_order=asyn_individual_cathepsin.sort_values(ascending=True)
asyn_individual_cathepsin_order

In [None]:
#Exclude zeros (if any)
Cleavages_asyn_order_nonzero = [value for value in asyn_individual_cathepsin_order if value!=0]
Cathepsins_order_nonzero = [value for value in asyn_individual_cathepsin_order.keys() if value!='O' and value!='F' and value!='A']
list(Cleavages_asyn_order_nonzero), list(Cathepsins_order_nonzero)

In [None]:
#Creating custom color map for pie chart
from matplotlib.colors import LinearSegmentedColormap
colors_heatmap = [[0, 'mintcream'],
                  [1, 'mediumaquamarine']]
cmap_heatmap = LinearSegmentedColormap.from_list('', colors_heatmap, )
cmap_heatmap
colors = plt.get_cmap(cmap_heatmap)(np.linspace(0, 1, len(Cleavages_asyn_order_nonzero)))

In [None]:
#Pie chart showing number and % of individual protease cleavages 
def make_autopct(x):
    def my_autopct(pct):
    
        val = int(round(pct*total/100.0))
        return '{v:d}({p:.0f}%)'.format(p=pct,v=val)
    return my_autopct

patches, labels, pct_texts = plt.pie(Cleavages_asyn_order_nonzero,
        colors=colors,  
    wedgeprops={'linewidth': 1.0, 'edgecolor': 'grey'},
    textprops={'weight': 'bold','fontsize':50}, startangle=180, radius=8, shadow = False, 
        autopct=make_autopct(Cleavages_asyn_order_nonzero), pctdistance=0.8, labeldistance=1.05, labels=Cathepsins_order_nonzero)

    
plt.show()