In [1]:
from anarci_toolz.pipeline import run_anarci_toolz
import pandas as pd
import boto3

# NI006 Antibody Sequence 

SAMPLE_NAME = "NI006"

heavy='QVQLQESGPGLVKPSETLSLTCSVSGGSIISRSSYWGWIRQPPGKGLEWIGGIYHSGNTYDNPSLKSRLTMSVDTSKNQFSLNLRSVTAADTAVYYCARIVPGGDAFDIWGQGTMVTVSSASTKGPSVFPLAPSSKSTSGGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSSLGTQTYICNVNHKPSNTKVDKKVEPKSCDKTHTCPPCPAPELLGGPSVFLFPPKPKDTLMISRTPEVTCVVVDVSHEDPEVKFNWYVDGVEVHNAKTKPREEQYNSTYRVVSVLTVLHQDWLNGKEYKCKVSNKALPAPIEKTISKAKGQPREPQVYTLPPSRDELTKNQVSLTCLVKGFYPSDIAVEWESNGQPENNYKTTPPVLDSDGSFFLYSKLTVDKSRWQQGNVFSCSVMHEALHNHYTQKSLSLSPGK'
light='DIQMTQSPSSLSASVGDRVTIACRASQSVGTYLNWYQQKRGKAPKLLIFAASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSSPPTFGQGTKVEIKRTVAAPSVFIFPPSDEQLKSGTASVVCLLNNFYPREAKVQWKVDNALQSGNSQESVTEQDSKDSTYSLSSTLTLSKADYEKHKVYACEVTHQGLSSPVTKSFNRGEC'


In [2]:

data_ls = [heavy, light]

df_long = pd.DataFrame({
    "sequence_aa": data_ls,
    "chain_type_input": ["HeavySequence", "LightSequence"]})

df_result_kabat = run_anarci_toolz(
    df=df_long,
    scheme="kabat",
    allowed_species=["human"],
    seq_aa_header="sequence_aa",
    retain_indices=True)

top-level pandera module will be **removed in a future version of pandera**.
If you're using pandera to validate pandas objects, we highly recommend updating
your import:

```
# old import
import pandera as pa

# new import
import pandera.pandas as pa
```

If you're using pandera to validate objects from other compatible libraries
like pyspark or polars, see the supported libraries section of the documentation
for more information on how to import pandera:

https://pandera.readthedocs.io/en/stable/supported_libraries.html


```
```

[32m2025-07-01 22:51:47.893[0m | [1mINFO    [0m | [36manarci_toolz.abnumber_tool[0m:[36mrun_parallel_abnumber[0m:[36m62[0m - [1mStarting AbNumber processing ...[0m
Processing sequences: 100%|██████████| 2/2 [00:00<00:00, 1559.51seq/s]
[32m2025-07-01 22:51:47.993[0m | [32m[1mSUCCESS [0m | [36manarci_toolz.abnumber_tool[0m:[36mparallel_get_region_seqs[0m:[36m138[0m - [32m[1mAbNumber parallel processing complete[0m
[32m2025-07-01 22:

In [3]:
# Fetch the variable region sequence alignments
for i in df_result_kabat['sequence_alignment_aa']:
    print(i)

QVQLQESGPGLVKPSETLSLTCSVSGGSIISRSSYWGWIRQPPGKGLEWIGGIYHSGNTYDNPSLKSRLTMSVDTSKNQFSLNLRSVTAADTAVYYCARIVPGGDAFDIWGQGTMVTVSS
DIQMTQSPSSLSASVGDRVTIACRASQSVGTYLNWYQQKRGKAPKLLIFAASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSSPPTFGQGTKVEIK


In [4]:
df_result_imgt = run_anarci_toolz(
    df=df_long,
    scheme="imgt",
    allowed_species=["human"],
    seq_aa_header="sequence_aa",
    retain_indices=True)

[32m2025-07-01 22:51:52.575[0m | [1mINFO    [0m | [36manarci_toolz.abnumber_tool[0m:[36mrun_parallel_abnumber[0m:[36m62[0m - [1mStarting AbNumber processing ...[0m
Processing sequences: 100%|██████████| 2/2 [00:00<00:00, 2083.09seq/s]
[32m2025-07-01 22:51:52.669[0m | [32m[1mSUCCESS [0m | [36manarci_toolz.abnumber_tool[0m:[36mparallel_get_region_seqs[0m:[36m138[0m - [32m[1mAbNumber parallel processing complete[0m
[32m2025-07-01 22:51:52.693[0m | [1mINFO    [0m | [36manarci_toolz.anarci_tool[0m:[36mrun_parallel_anarci[0m:[36m91[0m - [1mStarting ANARCI processing ...[0m
Processing sequences: 100%|██████████| 2/2 [00:00<00:00, 2423.75seq/s]
[32m2025-07-01 22:51:52.770[0m | [32m[1mSUCCESS [0m | [36manarci_toolz.anarci_tool[0m:[36mrun_parallel_anarci[0m:[36m104[0m - [32m[1mANARCI parallel processing complete[0m
[32m2025-07-01 22:51:52.782[0m | [32m[1mSUCCESS [0m | [36manarci_toolz.pipeline[0m:[36mrun_anarci_toolz[0m:[36m86[0m - 

In [7]:
filename_kabat = f"{SAMPLE_NAME}_result_anarci_kabat.csv"
filename_imgt = f"{SAMPLE_NAME}_result_anarci_imgt.csv"

In [8]:
df_result_kabat.to_csv(filename_kabat, index=False)
df_result_imgt.to_csv(filename_imgt, index=False)

In [9]:
# Move to S3:
s3 = boto3.client('s3')

bucket_name = 'bbio-data-platform'

# Upload the file
for local_file in [filename_kabat, filename_imgt]:
    s3_key = f'dev/ab-discovery/anarci/{local_file}'
    s3.upload_file(local_file, bucket_name, s3_key)

    print(f"Uploaded {local_file} to s3://{bucket_name}/{s3_key}")

Uploaded NI006_result_anarci_kabat.csv to s3://bbio-data-platform/dev/ab-discovery/anarci/NI006_result_anarci_kabat.csv
Uploaded NI006_result_anarci_imgt.csv to s3://bbio-data-platform/dev/ab-discovery/anarci/NI006_result_anarci_imgt.csv
