In [1]:
#### Load library
import scanpy as sc
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.image as mpimg
import matplotlib as mpl
from matplotlib.pyplot import rc_context
import SpatialproxseqClasses as SPC
import Proxseqnetwork as WPPI

#*****
mpl.rcdefaults()
# Set font to be arial
mpl.rc('font', **{'sans-serif':'Arial', 'size':12})
mpl.rcParams['mathtext.rm'] = 'sans' 
mpl.rcParams['axes.titlesize'] = 12
# Set default tick size
mpl.rcParams['xtick.major.size'] = 5.5
mpl.rcParams['ytick.major.size'] = 5.5
mpl.rcParams['xtick.minor.size'] = 2.5
mpl.rcParams['ytick.minor.size'] = 2.5
# Default legend settings
mpl.rcParams['legend.fancybox'] = False
mpl.rcParams['legend.edgecolor'] = 'k'

#to store text as text, not as path
new_rc_params = {'text.usetex': False,
                 "svg.fonttype": 'none'}
mpl.rcParams.update(new_rc_params)
sc.settings.verbosity = 3


In [2]:
#read PLA data and transform the data
data_A1 = pd.read_csv('A1-PLA_count_matrix.txt.gz', sep="\t",index_col=0)
# Update row names (index)
data_A1.index = data_A1.index.str.replace(r"^VCAM1_B:(.*)$", r"CD40_A:\1", regex=True)
data_A1.index = data_A1.index.str.replace(r"^CD29_B:(.*)$", r"CD279_A:\1", regex=True)
data_A1.index = data_A1.index.str.replace(r"^LFA1_B:(.*)$", r"CD11a_A:\1", regex=True)
valid_condition_A1 = ~data_A1.index.str.contains(r'((:.*_A)|(_B:))', regex=True)

# Apply the filter to the dataframe
pla_A1 = data_A1[valid_condition_A1]

#remover suffix 
pla_A1.index = pla_A1.index.str.replace(r'(_A|_B)', '', regex=True)
pla_A1 = pla_A1.loc[~pla_A1.index.str.contains('HLADPRQ'),:]

  valid_condition_A1 = ~data_A1.index.str.contains(r'((:.*_A)|(_B:))', regex=True)


In [3]:
pla_obj_A1 = SPC.sproxseqObject(pla_A1)
pla_obj_A1.compute_protein_abundance()
pla_obj_A1.compute_protein_pair_counts()


In [4]:
pla_obj_A1.compute_fisher_pvalues()

In [5]:
pla_obj_A1.fisher_value.to_csv('A1_fisher_test.csv')

In [6]:
#read PLA data and transform the data
data_B1 = pd.read_csv('B1-PLA_count_matrix.txt.gz', sep="\t",index_col=0)
# Update row names (index)
data_B1.index = data_B1.index.str.replace(r"^VCAM1_B:(.*)$", r"CD40_A:\1", regex=True)
data_B1.index = data_B1.index.str.replace(r"^CD29_B:(.*)$", r"CD279_A:\1", regex=True)
data_B1.index = data_B1.index.str.replace(r"^LFA1_B:(.*)$", r"CD11a_A:\1", regex=True)
valid_condition_B1 = ~data_B1.index.str.contains(r'((:.*_A)|(_B:))', regex=True)

# Apply the filter to the dataframe
pla_B1 = data_B1[valid_condition_B1]

#remover suffix 
pla_B1.index = pla_B1.index.str.replace(r'(_A|_B)', '', regex=True)
pla_B1 = pla_B1.loc[~pla_B1.index.str.contains('HLADPRQ'),:]

  valid_condition_B1 = ~data_B1.index.str.contains(r'((:.*_A)|(_B:))', regex=True)


In [7]:
pla_obj_B1 = SPC.sproxseqObject(pla_B1)
pla_obj_B1.compute_protein_abundance()
pla_obj_B1.compute_protein_pair_counts()


In [8]:
pla_obj_B1.compute_fisher_pvalues()

In [9]:
pla_obj_B1.fisher_value.to_csv('B1_fisher_test.csv')

In [10]:
#read PLA data and transform the data
data_D1 = pd.read_csv('D1-PLA_count_matrix.txt.gz', sep="\t",index_col=0)
# Update row names (index)
data_D1.index = data_D1.index.str.replace(r"^VCAM1_B:(.*)$", r"CD40_A:\1", regex=True)
data_D1.index = data_D1.index.str.replace(r"^CD29_B:(.*)$", r"CD279_A:\1", regex=True)
data_D1.index = data_D1.index.str.replace(r"^LFA1_B:(.*)$", r"CD11a_A:\1", regex=True)
valid_condition_D1 = ~data_D1.index.str.contains(r'((:.*_A)|(_B:))', regex=True)

# Apply the filter to the dataframe
pla_D1 = data_D1[valid_condition_D1]

#remover suffix 
pla_D1.index = pla_D1.index.str.replace(r'(_A|_B)', '', regex=True)
pla_D1 = pla_D1.loc[~pla_D1.index.str.contains('HLADPRQ'),:]

  valid_condition_D1 = ~data_D1.index.str.contains(r'((:.*_A)|(_B:))', regex=True)


In [11]:

pla_obj_D1 = SPC.sproxseqObject(pla_D1)
pla_obj_D1.compute_protein_abundance()
pla_obj_D1.compute_protein_pair_counts()

In [12]:
pla_obj_D1.compute_fisher_pvalues()

In [13]:
pla_obj_D1.fisher_value.to_csv('D1_fisher_test.csv')