# OakVar - Python Script Integration Example: adding rsID to Polygenic Risk Score Catalog data

## Setup

In [1]:
import oakvar as ov
import pandas as pd

### Load dbSNP annotation module and define a helper function

In [2]:
try:
    dbsnp_m = ov.get_annotator("dbsnp")  # next release
except:
    dbsnp_m = ov.get_live_annotator("dbsnp")  # current release

In [3]:
def get_rsid(row):
    data = dbsnp_m.annotate(
        {
            "chrom": "chr" + row["hm_chr"],
            "pos": row["hm_pos"],
            "ref_base": row["other_allele"],
            "alt_base": row["effect_allele"],
        }
    )
    if not data:
        return data
    else:
        return data["rsid"]

### Load PGS data

In [4]:
df = pd.read_csv("PGS002724_hmPOS_GRCh38.txt", sep="\t", header=19, low_memory=False)

#### Drop the rows which cannot be queried by dbSNP

In [5]:
df = df[~df["hm_pos"].isna()]

## Add rsID

> Testing with the first 1000 rows

In [6]:
df_head = df.head(1000).copy()

In [7]:
df_head["hm_rsID"] = df_head.apply(lambda row: get_rsid(row), axis=1)

In [8]:
df_head

Unnamed: 0,chr_name,chr_position,effect_allele,other_allele,effect_weight,hm_source,hm_rsID,hm_chr,hm_pos,hm_inferOtherAllele
0,1,752721,G,A,50.200914,liftover,rs3131972,1,817341.0,
1,1,754182,G,A,141.073654,liftover,rs3131969,1,818802.0,
2,1,760912,T,C,180.556537,liftover,rs1048488,1,825532.0,
3,1,768448,A,G,-74.643825,liftover,rs12562034,1,833068.0,
4,1,779322,G,A,-137.024959,liftover,rs4040617,1,843942.0,
...,...,...,...,...,...,...,...,...,...,...
995,1,3394250,T,C,22.337444,liftover,rs10909948,1,3477686.0,
996,1,3395039,A,G,-67.771429,liftover,rs10797395,1,3478475.0,
997,1,3400294,T,G,-21.663690,liftover,rs2493314,1,3483730.0,
998,1,3404227,T,C,232.051664,liftover,rs2821041,1,3487663.0,
