# Demo

This is a demonstration of the basic Python usage of the `anarci-toolz` package.

Please refer to the README for guidance on usage on the command line interface tool.

In [1]:
import pandas as pd
from anarci_toolz.pipeline import run_anarci_toolz

Fetch data from open source database of therapeutic antibodies from [TheraSabDab](https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/therasabdab/search/)

In [2]:
TARGET_URL = "https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/static/downloads/TheraSAbDab_SeqStruc_OnlineDownload.csv"
df = pd.read_csv(TARGET_URL)

In [3]:
# Do some light transformation to prepare data
df = df[df['Format']=="Whole mAb"]
n_sample = 100
df = df.sample(n_sample)
df = df[['Therapeutic', 'Format', 'HeavySequence', 'LightSequence']]

# Note: here we designate the amino acid sequence column as "sequence_aa"
df_long = pd.melt(df, id_vars=['Therapeutic', 'Format'], value_vars=['HeavySequence', 'LightSequence'], var_name='chain_type_input', value_name='sequence_aa')

In [4]:
df_long.head()

Unnamed: 0,Therapeutic,Format,chain_type_input,sequence_aa
0,Orticumab,Whole mAb,HeavySequence,EVQLLESGGGLVQPGGSLRLSCAASGFTFSNAWMSWVRQAPGKGLE...
1,Enokizumab,Whole mAb,HeavySequence,QVQLVQSGAEVKKPGSSVKVSCKASGGTFSYYWIEWVRQAPGQGLE...
2,Bimekizumab,Whole mAb,HeavySequence,EVQLVESGGGLVQPGGSLRLSCAASGFTFSDYNMAWVRQAPGKGLE...
3,Micvotabart,Whole mAb,HeavySequence,EVQLLESGGGLVQPGGSLRLSCAASGFTFSSFSMSWVRQAPGKGLE...
4,Ozuriftamab,Whole mAb,HeavySequence,EVQLVQSGAEVKKPGESLRISCKGSGYTFTEYTMHWVRQAPGQGLE...


In [5]:
# Run anarci-toolz

df_result = run_anarci_toolz(
    df=df_long,
    scheme="imgt",
    allowed_species=["human"],
    seq_aa_header="sequence_aa",)

[32m2025-02-21 14:35:40.973[0m | [1mINFO    [0m | [36manarci_toolz.abnumber_tool[0m:[36mrun_parallel_abnumber[0m:[36m62[0m - [1mStarting AbNumber processing ...[0m
Processing sequences: 100%|██████████| 200/200 [00:00<00:00, 55093.97seq/s]
[32m2025-02-21 14:35:41.661[0m | [32m[1mSUCCESS [0m | [36manarci_toolz.abnumber_tool[0m:[36mparallel_get_region_seqs[0m:[36m138[0m - [32m[1mAbNumber parallel processing complete[0m
[32m2025-02-21 14:35:41.729[0m | [1mINFO    [0m | [36manarci_toolz.anarci_tool[0m:[36mrun_parallel_anarci[0m:[36m91[0m - [1mStarting ANARCI processing ...[0m
Processing sequences: 100%|██████████| 200/200 [00:00<00:00, 66900.14seq/s]
[32m2025-02-21 14:35:42.262[0m | [32m[1mSUCCESS [0m | [36manarci_toolz.anarci_tool[0m:[36mrun_parallel_anarci[0m:[36m104[0m - [32m[1mANARCI parallel processing complete[0m
[32m2025-02-21 14:35:42.299[0m | [32m[1mSUCCESS [0m | [36manarci_toolz.pipeline[0m:[36mrun_anarci_toolz[0m:[36m

We can see a comprehensive annotated ANARCI result from the `anarci-toolz`:

In [6]:
df_result.head()

Unnamed: 0,Therapeutic,Format,chain_type_input,sequence_aa,scheme,passed_abnumber,sequence_alignment_aa,species,chain_type,v_gene,...,fr1_aa,fr2_aa,fr3_aa,fr4_aa,passed_anarci,variable_region_start_index,variable_region_end_index,e_value,bitscore,bias
0,Orticumab,Whole mAb,HeavySequence,EVQLLESGGGLVQPGGSLRLSCAASGFTFSNAWMSWVRQAPGKGLE...,imgt,True,EVQLLESGGGLVQPGGSLRLSCAASGFTFSNAWMSWVRQAPGKGLE...,human,H,IGHV3-23*01,...,EVQLLESGGGLVQPGGSLRLSCAAS,MSWVRQAPGKGLEWVSS,YYADSVKGRSTISRDNSKNTLYLQMNSLRAEDTAVYYC,WGQGTLVTVSS,True,0,121,9.5e-61,194.7,0.7
1,Enokizumab,Whole mAb,HeavySequence,QVQLVQSGAEVKKPGSSVKVSCKASGGTFSYYWIEWVRQAPGQGLE...,imgt,True,QVQLVQSGAEVKKPGSSVKVSCKASGGTFSYYWIEWVRQAPGQGLE...,human,H,IGHV1-69*01,...,QVQLVQSGAEVKKPGSSVKVSCKAS,IEWVRQAPGQGLEWMGE,NPNEKFKGRVTITADESTSTAYMELSSLRSEDTAVYYC,WGQGTLVTVSS,True,0,122,2.3e-56,180.6,1.6
2,Bimekizumab,Whole mAb,HeavySequence,EVQLVESGGGLVQPGGSLRLSCAASGFTFSDYNMAWVRQAPGKGLE...,imgt,True,EVQLVESGGGLVQPGGSLRLSCAASGFTFSDYNMAWVRQAPGKGLE...,human,H,IGHV3-7*01,...,EVQLVESGGGLVQPGGSLRLSCAAS,MAWVRQAPGKGLEWVAT,YYRDSVKGRFTISRDNAKNSLYLQMNSLRAEDTAVYYC,WGQGTLVTVSS,True,0,125,2.7e-58,186.8,0.3
3,Micvotabart,Whole mAb,HeavySequence,EVQLLESGGGLVQPGGSLRLSCAASGFTFSSFSMSWVRQAPGKGLE...,imgt,True,EVQLLESGGGLVQPGGSLRLSCAASGFTFSSFSMSWVRQAPGKGLE...,human,H,IGHV3-23*01,...,EVQLLESGGGLVQPGGSLRLSCAAS,MSWVRQAPGKGLEWVSS,YYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYC,WGQGTLVTVSS,True,0,116,1.4e-61,197.4,0.5
4,Ozuriftamab,Whole mAb,HeavySequence,EVQLVQSGAEVKKPGESLRISCKGSGYTFTEYTMHWVRQAPGQGLE...,imgt,True,EVQLVQSGAEVKKPGESLRISCKGSGYTFTEYTMHWVRQAPGQGLE...,human,H,IGHV5-10-1*01,...,EVQLVQSGAEVKKPGESLRISCKGS,MHWVRQAPGQGLEWMGG,GYNQKFKGRVTISADKSISTAYLQWSSLKASDTAMYYC,WGQGTLVTVSS,True,0,122,2.3999999999999998e-57,183.7,2.8


In [7]:
# Display residue view 

df_result = run_anarci_toolz(
    df=df_long,
    scheme="imgt",
    allowed_species=["human"],
    seq_aa_header="sequence_aa",
    display_residue_view=True)

[32m2025-02-21 14:36:36.938[0m | [1mINFO    [0m | [36manarci_toolz.abnumber_tool[0m:[36mrun_parallel_abnumber[0m:[36m62[0m - [1mStarting AbNumber processing ...[0m
Processing sequences: 100%|██████████| 200/200 [00:00<00:00, 80404.56seq/s]
[32m2025-02-21 14:36:37.626[0m | [32m[1mSUCCESS [0m | [36manarci_toolz.abnumber_tool[0m:[36mparallel_get_region_seqs[0m:[36m138[0m - [32m[1mAbNumber parallel processing complete[0m
[32m2025-02-21 14:36:38.364[0m | [1mINFO    [0m | [36manarci_toolz.anarci_tool[0m:[36mrun_parallel_anarci[0m:[36m91[0m - [1mStarting ANARCI processing ...[0m
Processing sequences: 100%|██████████| 200/200 [00:00<00:00, 54531.68seq/s]
[32m2025-02-21 14:36:38.931[0m | [32m[1mSUCCESS [0m | [36manarci_toolz.anarci_tool[0m:[36mrun_parallel_anarci[0m:[36m104[0m - [32m[1mANARCI parallel processing complete[0m
[32m2025-02-21 14:36:38.979[0m | [32m[1mSUCCESS [0m | [36manarci_toolz.pipeline[0m:[36mrun_anarci_toolz[0m:[36m

In [8]:
df_result.head()

Unnamed: 0,Therapeutic,Format,chain_type_input,sequence_aa,scheme,passed_abnumber,sequence_alignment_aa,species,chain_type,v_gene,...,imgt_pos_125,imgt_pos_126,imgt_pos_127,imgt_pos_128,passed_anarci,variable_region_start_index,variable_region_end_index,e_value,bitscore,bias
0,Orticumab,Whole mAb,HeavySequence,EVQLLESGGGLVQPGGSLRLSCAASGFTFSNAWMSWVRQAPGKGLE...,imgt,True,EVQLLESGGGLVQPGGSLRLSCAASGFTFSNAWMSWVRQAPGKGLE...,human,H,IGHV3-23*01,...,T,V,S,S,True,0,121,9.5e-61,194.7,0.7
1,Enokizumab,Whole mAb,HeavySequence,QVQLVQSGAEVKKPGSSVKVSCKASGGTFSYYWIEWVRQAPGQGLE...,imgt,True,QVQLVQSGAEVKKPGSSVKVSCKASGGTFSYYWIEWVRQAPGQGLE...,human,H,IGHV1-69*01,...,T,V,S,S,True,0,122,2.3e-56,180.6,1.6
2,Bimekizumab,Whole mAb,HeavySequence,EVQLVESGGGLVQPGGSLRLSCAASGFTFSDYNMAWVRQAPGKGLE...,imgt,True,EVQLVESGGGLVQPGGSLRLSCAASGFTFSDYNMAWVRQAPGKGLE...,human,H,IGHV3-7*01,...,T,V,S,S,True,0,125,2.7e-58,186.8,0.3
3,Micvotabart,Whole mAb,HeavySequence,EVQLLESGGGLVQPGGSLRLSCAASGFTFSSFSMSWVRQAPGKGLE...,imgt,True,EVQLLESGGGLVQPGGSLRLSCAASGFTFSSFSMSWVRQAPGKGLE...,human,H,IGHV3-23*01,...,T,V,S,S,True,0,116,1.4e-61,197.4,0.5
4,Ozuriftamab,Whole mAb,HeavySequence,EVQLVQSGAEVKKPGESLRISCKGSGYTFTEYTMHWVRQAPGQGLE...,imgt,True,EVQLVQSGAEVKKPGESLRISCKGSGYTFTEYTMHWVRQAPGQGLE...,human,H,IGHV5-10-1*01,...,T,V,S,S,True,0,122,2.3999999999999998e-57,183.7,2.8
