# Install pyliftover

In [None]:
pip install pyliftover

# Load GWAS Example File

In [2]:
import pandas as pd
file_path = "GWAS/GWAS_Example.tsv"
df = pd.read_csv(file_path, sep="\t")
df

Unnamed: 0,SNP,Chr,bp_38,effect_allele,other_allele,EAF,beta,p_value
0,rs70937161,1,1624323,A,AT,0.5479,0.01827,1.664000e-08
1,rs12735861,1,1627515,T,C,0.5565,0.01854,4.827000e-09
2,rs3765907,1,1641261,T,C,0.2013,-0.02492,7.714000e-10
3,rs61774957,1,1657081,T,C,0.2435,-0.02008,3.570000e-08
4,rs71511310,1,1657093,A,C,0.3030,-0.02178,7.632000e-09
...,...,...,...,...,...,...,...,...
43303,rs56373884,22,43960588,A,G,0.1555,-0.02319,1.349000e-08
43304,rs2294921,22,43965962,T,C,0.1574,-0.02277,2.072000e-08
43305,rs3761472,22,43972242,G,A,0.1598,-0.02287,1.011000e-08
43306,rs61473277,22,43975526,G,A,0.1422,-0.02321,4.580000e-08


#  Liftover Script (GRCh37 ↔ GRCh38)

In [19]:
"""
GWAS Liftover (GRCh37 ↔ GRCh38)
--------------------------------
- Converts SNP coordinates between GRCh37 and GRCh38
- Uses UCSC chain files + pyliftover
- Works for TSV or CSV input
- User only sets: TARGET_BUILD, INPUT file, and CHR/BP column names
"""
import pandas as pd
from pyliftover import LiftOver

# ======================================================
# CONFIGURATION (EDIT ONLY THESE)
# ======================================================

INPUT = "GWAS/GWAS_Example.tsv"          # Input file
OUTPUT = "GWAS/GWAS_Example_Lifted_37.tsv"  # Output file

# Chain files (your files)
CHAIN_37_TO_38 = "chains/hg19ToHg38.over.chain.gz"
CHAIN_38_TO_37 = "chains/hg38ToHg19.over.chain.gz"

# Column names in your GWAS file
CHR_COL = "Chr"
BP_COL  = "bp_38"     

# TARGET genome build
#   TARGET_BUILD = 37  → Convert TO GRCh37
#   TARGET_BUILD = 38  → Convert TO GRCh38
TARGET_BUILD = 37
# ======================================================
# ------------------------------------------------------
# Load appropriate chain file based on TARGET_BUILD
# ------------------------------------------------------
if TARGET_BUILD == 37:
    print("Converting → GRCh37 (using hg38ToHg19 chain)")
    lo = LiftOver(CHAIN_38_TO_37)
    FROM_BUILD = "38"
    TO_BUILD = "37"
elif TARGET_BUILD == 38:
    print("Converting → GRCh38 (using hg19ToHg38 chain)")
    lo = LiftOver(CHAIN_37_TO_38)
    FROM_BUILD = "37"
    TO_BUILD = "38"
else:
    raise ValueError("TARGET_BUILD must be 37 or 38")
# ------------------------------------------------------
# Liftover helper
# ------------------------------------------------------
def lift(chr_val, pos_val):
    """Convert 1-based SNP position. Returns (chr, pos, status)."""

    if pd.isna(chr_val) or pd.isna(pos_val):
        return None, None, "BAD_INPUT"

    try:
        chr_clean = str(chr_val).replace("chr", "").strip()
        pos_clean = int(pos_val)
    except:
        return None, None, "BAD_PARSE"

    # pyliftover uses 0-based
    result = lo.convert_coordinate("chr" + chr_clean, pos_clean - 1)

    if not result:
        return None, None, "UNMAPPED"

    new_chr = result[0][0].replace("chr", "")
    new_pos = result[0][1] + 1   # back to 1-based

    return new_chr, int(new_pos), "OK"


# ------------------------------------------------------
# Load GWAS file
# ------------------------------------------------------
print(f"Loading file: {INPUT}")

sep = "\t" if INPUT.endswith(".tsv") else ","
df = pd.read_csv(INPUT, sep=sep)

if CHR_COL not in df.columns or BP_COL not in df.columns:
    raise ValueError(f" Missing columns: {CHR_COL}, {BP_COL}\n"
                     f"Available columns: {list(df.columns)}")


# ------------------------------------------------------
# Apply liftover
# ------------------------------------------------------
print(f"Running liftover: GRCh{FROM_BUILD} → GRCh{TO_BUILD}...")

new_chr_list = []
new_bp_list = []
status_list = []

for chrom, pos in zip(df[CHR_COL], df[BP_COL]):
    new_chr, new_bp, status = lift(chrom, pos)
    new_chr_list.append(new_chr)
    new_bp_list.append(new_bp)
    status_list.append(status)

df[f"Chr_{TO_BUILD}"] = new_chr_list
df[f"bp_{TO_BUILD}"] = new_bp_list
df["LO_STATUS"] = status_list


# ------------------------------------------------------
# Save output
# ------------------------------------------------------
df.to_csv(OUTPUT, sep="\t", index=False)
print(f"\n Liftover complete! Saved → {OUTPUT}")

Converting → GRCh37 (using hg38ToHg19 chain)
Loading file: GWAS/GWAS_Example.tsv
Running liftover: GRCh38 → GRCh37...

 Liftover complete! Saved → GWAS/GWAS_Example_Lifted_37.tsv


In [20]:
import pandas as pd
file_path = "GWAS/GWAS_Example_Lifted_37.tsv"
df2 = pd.read_csv(file_path, sep="\t")
df2

Unnamed: 0,SNP,Chr,bp_38,effect_allele,other_allele,EAF,beta,p_value,Chr_37,bp_37,LO_STATUS
0,rs70937161,1,1624323,A,AT,0.5479,0.01827,1.664000e-08,1.0,1559703.0,OK
1,rs12735861,1,1627515,T,C,0.5565,0.01854,4.827000e-09,1.0,1562895.0,OK
2,rs3765907,1,1641261,T,C,0.2013,-0.02492,7.714000e-10,1.0,1576622.0,OK
3,rs61774957,1,1657081,T,C,0.2435,-0.02008,3.570000e-08,1.0,1588531.0,OK
4,rs71511310,1,1657093,A,C,0.3030,-0.02178,7.632000e-09,1.0,1588543.0,OK
...,...,...,...,...,...,...,...,...,...,...,...
43303,rs56373884,22,43960588,A,G,0.1555,-0.02319,1.349000e-08,22.0,44356468.0,OK
43304,rs2294921,22,43965962,T,C,0.1574,-0.02277,2.072000e-08,22.0,44361842.0,OK
43305,rs3761472,22,43972242,G,A,0.1598,-0.02287,1.011000e-08,22.0,44368122.0,OK
43306,rs61473277,22,43975526,G,A,0.1422,-0.02321,4.580000e-08,22.0,44371406.0,OK
