# Demo

This is a demonstration of the basic Python usage of the `anarci-toolz` package.

Please refer to the README for guidance on usage on the command line interface tool.

In [1]:
import pandas as pd
from anarci_toolz.pipeline import run_anarci_toolz

Fetch data from open source database of therapeutic antibodies from [TheraSabDab](https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/therasabdab/search/)

In [2]:
TARGET_URL = "https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/static/downloads/TheraSAbDab_SeqStruc_OnlineDownload.csv"
df = pd.read_csv(TARGET_URL)

In [3]:
# Do some light transformation to prepare data
df = df[df['Format']=="Whole mAb"]
n_sample = 100
df = df.sample(n_sample)
df = df[['Therapeutic', 'Format', 'HeavySequence', 'LightSequence']]

# Note: here we designate the amino acid sequence column as "sequence_aa"
df_long = pd.melt(df, id_vars=['Therapeutic', 'Format'], value_vars=['HeavySequence', 'LightSequence'], var_name='chain_type_input', value_name='sequence_aa')

In [4]:
df_long.head()

Unnamed: 0,Therapeutic,Format,chain_type_input,sequence_aa
0,Vantictumab,Whole mAb,HeavySequence,EVQLVESGGGLVQPGGSLRLSCAASGFTFSHYTLSWVRQAPGKGLE...
1,Sabatolimab,Whole mAb,HeavySequence,QVQLVQSGAEVKKPGSSVKVSCKASGYTFTSYNMHWVRQAPGQGLE...
2,Briquilimab,Whole mAb,HeavySequence,QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYNMHWVRQAPGQGLE...
3,Narlumosbart,Whole mAb,HeavySequence,EVQLLESGGGLVQPGGSLRLSCAASGFTFSSYAMSWVRQAPGKGLE...
4,Varlilumab,Whole mAb,HeavySequence,QVQLVESGGGVVQPGRSLRLSCAASGFTFSSYDMHWVRQAPGKGLE...


In [5]:
# Run anarci-toolz

df_result = run_anarci_toolz(
    df=df_long,
    scheme="imgt",
    allowed_species=["human"],
    seq_aa_header="sequence_aa",)

[32m2025-03-11 20:06:11.671[0m | [1mINFO    [0m | [36manarci_toolz.abnumber_tool[0m:[36mrun_parallel_abnumber[0m:[36m62[0m - [1mStarting AbNumber processing ...[0m
Processing sequences: 100%|██████████| 200/200 [00:00<00:00, 43620.24seq/s]
[32m2025-03-11 20:06:13.880[0m | [32m[1mSUCCESS [0m | [36manarci_toolz.abnumber_tool[0m:[36mparallel_get_region_seqs[0m:[36m138[0m - [32m[1mAbNumber parallel processing complete[0m
[32m2025-03-11 20:06:13.931[0m | [1mINFO    [0m | [36manarci_toolz.anarci_tool[0m:[36mrun_parallel_anarci[0m:[36m91[0m - [1mStarting ANARCI processing ...[0m
Processing sequences: 100%|██████████| 200/200 [00:00<00:00, 66932.16seq/s]
[32m2025-03-11 20:06:15.828[0m | [32m[1mSUCCESS [0m | [36manarci_toolz.anarci_tool[0m:[36mrun_parallel_anarci[0m:[36m104[0m - [32m[1mANARCI parallel processing complete[0m
[32m2025-03-11 20:06:15.858[0m | [32m[1mSUCCESS [0m | [36manarci_toolz.pipeline[0m:[36mrun_anarci_toolz[0m:[36m

We can see a comprehensive annotated ANARCI result from the `anarci-toolz`:

In [6]:
df_result.head()

Unnamed: 0,Therapeutic,Format,chain_type_input,sequence_aa,scheme,passed_abnumber,sequence_alignment_aa,species,chain_type,v_gene,...,fr1_aa,fr2_aa,fr3_aa,fr4_aa,passed_anarci,variable_region_start_index,variable_region_end_index,e_value,bitscore,bias
0,Vantictumab,Whole mAb,HeavySequence,EVQLVESGGGLVQPGGSLRLSCAASGFTFSHYTLSWVRQAPGKGLE...,imgt,True,EVQLVESGGGLVQPGGSLRLSCAASGFTFSHYTLSWVRQAPGKGLE...,alpaca,H,IGHV3-23*04,...,EVQLVESGGGLVQPGGSLRLSCAAS,LSWVRQAPGKGLEWVSV,YYADSVKGRFTISSDNSKNTLYLQMNSLRAEDTAVYYC,WGQGTLVTVSS,True,0,118,5.5999999999999994e-64,205.3,2.1
1,Sabatolimab,Whole mAb,HeavySequence,QVQLVQSGAEVKKPGSSVKVSCKASGYTFTSYNMHWVRQAPGQGLE...,imgt,True,QVQLVQSGAEVKKPGSSVKVSCKASGYTFTSYNMHWVRQAPGQGLE...,human,H,IGHV1-46*01,...,QVQLVQSGAEVKKPGSSVKVSCKAS,MHWVRQAPGQGLEWMGD,SYNQKFKGRVTITADKSTSTVYMELSSLRSEDTAVYYC,WGQGTTVTVSS,True,0,118,6.8e-60,192.0,2.4
2,Briquilimab,Whole mAb,HeavySequence,QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYNMHWVRQAPGQGLE...,imgt,True,QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYNMHWVRQAPGQGLE...,human,H,IGHV1-3*01,...,QVQLVQSGAEVKKPGASVKVSCKAS,MHWVRQAPGQGLEWMGV,SYNQKFKGRVTITADKSTSTAYMELSSLRSEDTAVYYC,WGQGTLVTVSS,True,0,117,5.6e-59,189.0,2.9
3,Narlumosbart,Whole mAb,HeavySequence,EVQLLESGGGLVQPGGSLRLSCAASGFTFSSYAMSWVRQAPGKGLE...,imgt,True,EVQLLESGGGLVQPGGSLRLSCAASGFTFSSYAMSWVRQAPGKGLE...,alpaca,H,IGHV3-23*01,...,EVQLLESGGGLVQPGGSLRLSCAAS,MSWVRQAPGKGLEWVSG,YYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYC,WGQGTLVTVSS,True,0,122,5.4e-67,215.1,2.1
4,Varlilumab,Whole mAb,HeavySequence,QVQLVESGGGVVQPGRSLRLSCAASGFTFSSYDMHWVRQAPGKGLE...,imgt,True,QVQLVESGGGVVQPGRSLRLSCAASGFTFSSYDMHWVRQAPGKGLE...,alpaca,H,IGHV3-33*01,...,QVQLVESGGGVVQPGRSLRLSCAAS,MHWVRQAPGKGLEWVAV,YYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYC,WGQGTLVTVSS,True,0,119,2.3999999999999997e-65,209.7,2.2


In [7]:
# Display residue view 

df_result = run_anarci_toolz(
    df=df_long,
    scheme="imgt",
    allowed_species=["human"],
    seq_aa_header="sequence_aa",
    display_residue_view=True)

[32m2025-03-11 20:06:21.067[0m | [1mINFO    [0m | [36manarci_toolz.abnumber_tool[0m:[36mrun_parallel_abnumber[0m:[36m62[0m - [1mStarting AbNumber processing ...[0m
Processing sequences: 100%|██████████| 200/200 [00:00<00:00, 63129.20seq/s]
[32m2025-03-11 20:06:23.404[0m | [32m[1mSUCCESS [0m | [36manarci_toolz.abnumber_tool[0m:[36mparallel_get_region_seqs[0m:[36m138[0m - [32m[1mAbNumber parallel processing complete[0m
  df["seq_id"] = seq_id
  df["seq_id"] = seq_id
  df["seq_id"] = seq_id
  df["seq_id"] = seq_id
  df["seq_id"] = seq_id
  df["seq_id"] = seq_id
  df["seq_id"] = seq_id
  df["seq_id"] = seq_id
  df["seq_id"] = seq_id
  df["seq_id"] = seq_id
  df["seq_id"] = seq_id
  df["seq_id"] = seq_id
  df["seq_id"] = seq_id
  df["seq_id"] = seq_id
  df["seq_id"] = seq_id
  df["seq_id"] = seq_id
  df["seq_id"] = seq_id
  df["seq_id"] = seq_id
  df["seq_id"] = seq_id
  df["seq_id"] = seq_id
  df["seq_id"] = seq_id
  df["seq_id"] = seq_id
  df["seq_id"] = seq_id
 

In [8]:
df_result.head()

Unnamed: 0,Therapeutic,Format,chain_type_input,sequence_aa,scheme,passed_abnumber,sequence_alignment_aa,species,chain_type,v_gene,...,imgt_pos_125,imgt_pos_126,imgt_pos_127,imgt_pos_128,passed_anarci,variable_region_start_index,variable_region_end_index,e_value,bitscore,bias
0,Vantictumab,Whole mAb,HeavySequence,EVQLVESGGGLVQPGGSLRLSCAASGFTFSHYTLSWVRQAPGKGLE...,imgt,True,EVQLVESGGGLVQPGGSLRLSCAASGFTFSHYTLSWVRQAPGKGLE...,alpaca,H,IGHV3-23*04,...,T,V,S,S,True,0,118,5.5999999999999994e-64,205.3,2.1
1,Sabatolimab,Whole mAb,HeavySequence,QVQLVQSGAEVKKPGSSVKVSCKASGYTFTSYNMHWVRQAPGQGLE...,imgt,True,QVQLVQSGAEVKKPGSSVKVSCKASGYTFTSYNMHWVRQAPGQGLE...,human,H,IGHV1-46*01,...,T,V,S,S,True,0,118,6.8e-60,192.0,2.4
2,Briquilimab,Whole mAb,HeavySequence,QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYNMHWVRQAPGQGLE...,imgt,True,QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYNMHWVRQAPGQGLE...,human,H,IGHV1-3*01,...,T,V,S,S,True,0,117,5.6e-59,189.0,2.9
3,Narlumosbart,Whole mAb,HeavySequence,EVQLLESGGGLVQPGGSLRLSCAASGFTFSSYAMSWVRQAPGKGLE...,imgt,True,EVQLLESGGGLVQPGGSLRLSCAASGFTFSSYAMSWVRQAPGKGLE...,alpaca,H,IGHV3-23*01,...,T,V,S,S,True,0,122,5.4e-67,215.1,2.1
4,Varlilumab,Whole mAb,HeavySequence,QVQLVESGGGVVQPGRSLRLSCAASGFTFSSYDMHWVRQAPGKGLE...,imgt,True,QVQLVESGGGVVQPGRSLRLSCAASGFTFSSYDMHWVRQAPGKGLE...,alpaca,H,IGHV3-33*01,...,T,V,S,S,True,0,119,2.3999999999999997e-65,209.7,2.2
