# Aadhaar Friction Index (AFI)

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path

pd.set_option('display.max_columns', None)

In [2]:
SIGNAL_PATH = Path("../datasets/processed/signals")


base_df = pd.read_csv(SIGNAL_PATH / "friction_signals.csv")


print("Rows:", base_df.shape[0])
base_df[["UIS", "RIS", "BSS", "TSD"]].describe()

Rows: 11394


Unnamed: 0,UIS,RIS,BSS,TSD
count,11394.0,11394.0,11394.0,11394.0
mean,0.002101,0.126689,0.515981,2.2060250000000002e-17
std,0.01421,0.231641,0.246605,0.953404
min,0.0,0.0,0.0,-1.212062
25%,0.000225,0.008986,0.338456,-0.3334802
50%,0.000354,0.014762,0.522398,-0.3074994
75%,0.000583,0.07155,0.69695,-0.2861346
max,1.0,1.0,1.0,3.326111


In [3]:
base_df["UIS_raw"] = base_df["UIS"]
base_df["RIS_raw"] = base_df["RIS"]
base_df["BSS_raw"] = base_df["BSS"]
base_df["TSD_raw"] = base_df["TSD"]

In [4]:
for raw_col, norm_col in [
    ("UIS_raw", "UIS"),
    ("RIS_raw", "RIS"),
    ("BSS_raw", "BSS"),
]:
    min_val = base_df[raw_col].min()
    max_val = base_df[raw_col].max()
    if max_val > min_val:
        base_df[norm_col] = (base_df[raw_col] - min_val) / (max_val - min_val)
    else:
        base_df[norm_col] = 0


In [5]:
tsd_min = base_df["TSD_raw"].min()
tsd_max = base_df["TSD_raw"].max()

base_df["TSD"] = (base_df["TSD_raw"] - tsd_min) / (tsd_max - tsd_min)

In [6]:
base_df[["UIS", "RIS", "BSS", "TSD"]].describe()

Unnamed: 0,UIS,RIS,BSS,TSD
count,11394.0,11394.0,11394.0,11394.0
mean,0.002101,0.126689,0.515981,0.267082
std,0.01421,0.231641,0.246605,0.210085
min,0.0,0.0,0.0,0.0
25%,0.000225,0.008986,0.338456,0.193598
50%,0.000354,0.014762,0.522398,0.199323
75%,0.000583,0.07155,0.69695,0.204031
max,1.0,1.0,1.0,1.0


In [7]:
weights = {
"UIS": 0.30, # Update intensity
"RIS": 0.25, # Repeat interaction pressure
"BSS": 0.25, # Biometric stress
"TSD": 0.20 # Temporal deviation
}


base_df["AFI_raw"] = (
weights["UIS"] * base_df["UIS"] +
weights["RIS"] * base_df["RIS"] +
weights["BSS"] * base_df["BSS"] +
weights["TSD"] * base_df["TSD"]
)

In [8]:
afi_min = base_df["AFI_raw"].min()
afi_max = base_df["AFI_raw"].max()


base_df["AFI"] = 100 * (base_df["AFI_raw"] - afi_min) / (afi_max - afi_min)

In [9]:
base_df.sort_values("AFI", ascending=False).head(10)[
["state", "district", "period", "AFI"]
]

Unnamed: 0,state,district,period,AFI
3400,Himachal Pradesh,Sirmaur,2025-01,100.0
3198,Haryana,Panchkula,2025-01,80.315663
7937,Punjab,Shaheed Bhagat Singh Nagar,2025-01,78.457539
7764,Punjab,Jalandhar,2025-01,78.129664
7885,Punjab,Rupnagar,2025-01,76.267156
4986,Madhya Pradesh,Anuppur,2025-01,75.505133
2410,Delhi,North East,2025-01,75.250153
3413,Himachal Pradesh,Solan,2025-01,75.123204
96,Andhra Pradesh,Ananthapur,2025-01,74.993422
3388,Himachal Pradesh,Shimla,2025-01,74.400972


In [10]:
OUTPUT_PATH = Path("../datasets/processed/index")
OUTPUT_PATH.mkdir(parents=True, exist_ok=True)


base_df.to_csv(OUTPUT_PATH / "aadhaar_friction_index.csv", index=False)


print("Aadhaar Friction Index saved to data/processed/index/")

Aadhaar Friction Index saved to data/processed/index/


In [11]:
# Select only AFI-related columns
afi_df = base_df[
    ["state", "district", "period", "AFI"]
].copy()

# Save to CSV
OUTPUT_PATH = Path("../datasets/processed/index")
OUTPUT_PATH.mkdir(parents=True, exist_ok=True)

afi_df.to_csv(
    OUTPUT_PATH / "aadhaar_friction_index_only.csv",
    index=False
)


print("AFI-only CSV saved successfully.")


AFI-only CSV saved successfully.


In [12]:
afi_df["AFI_rank"] = afi_df.groupby("period")["AFI"].rank(
    ascending=False,
    method="dense"
)

afi_df.to_csv(
    OUTPUT_PATH / "aadhaar_friction_index_ranked.csv",
    index=False
)
