In [1]:
import pandas as pd
import numpy as np

#Pyrepseq imports
import pyrepseq as prs

#Plotting imports
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
import matplotlib.style 
import matplotlib as mpl 

In [2]:
#Set default plotting parameters
mpl.rcParams['figure.figsize'] = [4.0, 4.0] 
mpl.rcParams["legend.frameon"] = False
mpl.rcParams['figure.dpi']=500 

SMALL_SIZE = 12
MEDIUM_SIZE = 14
BIGGER_SIZE = 20

plt.rc('font', size=MEDIUM_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=BIGGER_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=MEDIUM_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

## Data imports

### Specific sequences - VDJdb

In [3]:
vdjdb = pd.read_csv('../../data/vdjdb/vdjdb_paired_human_preprocessed.csv',sep=',')
vdjdb = vdjdb.dropna(subset = ["CDR3B", "TRBV", "TRBJ", "CDR3A", "TRAV", "TRAJ"] ).reset_index().drop("index", axis=1)


vdjdb["Alpha"] = vdjdb["CDR3A"] +  vdjdb["TRAV"]
vdjdb["Beta"] = vdjdb["CDR3B"] +  vdjdb["TRBV"]

### Background sequences - tanno

In [5]:
tanno = pd.read_csv('../../data/tanno/preprocessed/combined.csv',sep=',')
tanno = tanno.dropna(subset = ["CDR3B", "TRBV", "TRBJ", "CDR3A", "TRAV", "TRAJ"] ).reset_index().drop("index", axis=1)

tanno["Alpha"] = tanno["CDR3A"] +  tanno["TRAV"]
tanno["Beta"] = tanno["CDR3B"] +  tanno["TRBV"]

## Relevancy scores

In [6]:
rel_dict = prs.tcr_info.feature_relevance_dict(vdjdb, tanno, features=["Alpha", "Beta"]
                                               , spc_group_column="Epitope", unpair_background=True)

In [7]:
rel_dict

{'features': ['Alpha', 'Beta'],
 'Alpha': 9.433670039561719,
 'Alpha+Beta': 28.108089474886107,
 'Beta': 14.016909520997206,
 'Beta+Alpha': 28.108089474886107}