In [1]:
!git clone https://github.com/abhishek130904/UIDAI-Hackathon.git

Cloning into 'UIDAI-Hackathon'...
remote: Enumerating objects: 62, done.[K
remote: Counting objects: 100% (30/30), done.[K
remote: Compressing objects: 100% (28/28), done.[K
remote: Total 62 (delta 2), reused 28 (delta 2), pack-reused 32 (from 1)[K
Receiving objects: 100% (62/62), 77.33 MiB | 28.26 MiB/s, done.
Resolving deltas: 100% (15/15), done.
Updating files: 100% (38/38), done.


In [2]:
import numpy as np

In [3]:
import pandas as pd
import glob
import os

In [4]:
path = '/content/UIDAI-Hackathon/Dataset UIDAI/api_data_aadhar_biometric'

# Find all biometric CSV files in that folder
all_files = glob.glob(os.path.join(path, "*.csv"))

# Read each file and append to a list
# 'low_memory=False' helps avoid DtypeWarnings with large Aadhaar datasets
df_list = [pd.read_csv(f, low_memory=False) for f in all_files]

# Concatenate all files into one single DataFrame
biometric_master_df = pd.concat(df_list, axis=0, ignore_index=True)

# Save the combined file to your Drive or local Colab storage
biometric_master_df.to_csv('master_biometric_data.csv', index=False)

print(f"Merged {len(all_files)} files.")
print(f"Total records for analysis: {len(biometric_master_df)}")

Merged 4 files.
Total records for analysis: 1861108


In [5]:
path = '/content/UIDAI-Hackathon/Dataset UIDAI/api_data_aadhar_demographic'

# Find all biometric CSV files in that folder
all_files = glob.glob(os.path.join(path, "*.csv"))

# Read each file and append to a list
# 'low_memory=False' helps avoid DtypeWarnings with large Aadhaar datasets
df_list = [pd.read_csv(f, low_memory=False) for f in all_files]

# Concatenate all files into one single DataFrame
demographic_master_df = pd.concat(df_list, axis=0, ignore_index=True)

# Save the combined file to your Drive or local Colab storage
demographic_master_df.to_csv('master_demographic_data.csv', index=False)

print(f"Merged {len(all_files)} files.")
print(f"Total records for analysis: {len(demographic_master_df)}")

Merged 5 files.
Total records for analysis: 2071700


In [6]:
path = '/content/UIDAI-Hackathon/Dataset UIDAI/api_data_aadhar_enrolment'

# Find all biometric CSV files in that folder
all_files = glob.glob(os.path.join(path, "*.csv"))

# Read each file and append to a list
# 'low_memory=False' helps avoid DtypeWarnings with large Aadhaar datasets
df_list = [pd.read_csv(f, low_memory=False) for f in all_files]

# Concatenate all files into one single DataFrame
aadhar_enrolment_master_df = pd.concat(df_list, axis=0, ignore_index=True)

# Save the combined file to your Drive or local Colab storage
aadhar_enrolment_master_df.to_csv('master_aadhar_enrolment_data.csv', index=False)

print(f"Merged {len(all_files)} files.")
print(f"Total records for analysis: {len(aadhar_enrolment_master_df)}")

Merged 3 files.
Total records for analysis: 1006029


In [7]:
biometric_master_df.columns


Index(['date', 'state', 'district', 'pincode', 'bio_age_5_17', 'bio_age_17_'], dtype='object')

In [8]:
demographic_master_df.columns


Index(['date', 'state', 'district', 'pincode', 'demo_age_5_17',
       'demo_age_17_'],
      dtype='object')

In [9]:
aadhar_enrolment_master_df.columns


Index(['date', 'state', 'district', 'pincode', 'age_0_5', 'age_5_17',
       'age_18_greater'],
      dtype='object')

In [10]:
biometric_master_df.head()



Unnamed: 0,date,state,district,pincode,bio_age_5_17,bio_age_17_
0,07-11-2025,Haryana,Yamuna Nagar,135002,4,6
1,07-11-2025,Haryana,Yamunanagar,135001,1,2
2,07-11-2025,Himachal Pradesh,Bilaspur,174004,2,2
3,07-11-2025,Himachal Pradesh,Bilaspur,174005,1,0
4,07-11-2025,Himachal Pradesh,Bilaspur,174013,3,1


In [11]:
demographic_master_df.head()


Unnamed: 0,date,state,district,pincode,demo_age_5_17,demo_age_17_
0,18-11-2025,Karnataka,Hasan,573118,0,2
1,18-11-2025,Karnataka,Hasan,573124,1,3
2,18-11-2025,Karnataka,Hasan,573150,0,2
3,18-11-2025,Karnataka,Hassan,573113,0,1
4,18-11-2025,Karnataka,Hassan,573120,1,4


In [12]:
aadhar_enrolment_master_df.head()

Unnamed: 0,date,state,district,pincode,age_0_5,age_5_17,age_18_greater
0,02-03-2025,Meghalaya,East Khasi Hills,793121,11,61,37
1,09-03-2025,Karnataka,Bengaluru Urban,560043,14,33,39
2,09-03-2025,Uttar Pradesh,Kanpur Nagar,208001,29,82,12
3,09-03-2025,Uttar Pradesh,Aligarh,202133,62,29,15
4,09-03-2025,Karnataka,Bengaluru Urban,560016,14,16,21


In [13]:
for df in [biometric_master_df, demographic_master_df, aadhar_enrolment_master_df]:
    df['date'] = pd.to_datetime(df['date'], dayfirst=True, errors='coerce')


In [14]:
def normalize_district(x):
    x = str(x).lower().strip()
    x = x.replace(" ", "")
    return x

for df in [biometric_master_df, demographic_master_df, aadhar_enrolment_master_df]:
    df['district_norm'] = df['district'].apply(normalize_district)


In [15]:
num_cols_enroll = ['age_0_5','age_5_17','age_18_greater']
num_cols_bio = ['bio_age_5_17','bio_age_17_']
num_cols_demo = ['demo_age_5_17','demo_age_17_']

aadhar_enrolment_master_df[num_cols_enroll] = aadhar_enrolment_master_df[num_cols_enroll].apply(pd.to_numeric, errors='coerce')
biometric_master_df[num_cols_bio] = biometric_master_df[num_cols_bio].apply(pd.to_numeric, errors='coerce')
demographic_master_df[num_cols_demo] = demographic_master_df[num_cols_demo].apply(pd.to_numeric, errors='coerce')


In [16]:
biometric_master_df.head()

Unnamed: 0,date,state,district,pincode,bio_age_5_17,bio_age_17_,district_norm
0,2025-11-07,Haryana,Yamuna Nagar,135002,4,6,yamunanagar
1,2025-11-07,Haryana,Yamunanagar,135001,1,2,yamunanagar
2,2025-11-07,Himachal Pradesh,Bilaspur,174004,2,2,bilaspur
3,2025-11-07,Himachal Pradesh,Bilaspur,174005,1,0,bilaspur
4,2025-11-07,Himachal Pradesh,Bilaspur,174013,3,1,bilaspur


In [17]:
enroll_district = aadhar_enrolment_master_df.groupby(
    ['state','district_norm']
)[num_cols_enroll].sum().reset_index()

bio_district = biometric_master_df.groupby(
    ['state','district_norm']
)[num_cols_bio].sum().reset_index()

demo_district = demographic_master_df.groupby(
    ['state','district_norm']
)[num_cols_demo].sum().reset_index()


In [18]:
district_master = enroll_district.merge(
    bio_district, on=['state','district_norm'], how='left'
)

district_master = district_master.merge(
    demo_district, on=['state','district_norm'], how='left'
)

district_master.fillna(0, inplace=True)


In [19]:
aadhar_enrolment_master_df

Unnamed: 0,date,state,district,pincode,age_0_5,age_5_17,age_18_greater,district_norm
0,2025-03-02,Meghalaya,East Khasi Hills,793121,11,61,37,eastkhasihills
1,2025-03-09,Karnataka,Bengaluru Urban,560043,14,33,39,bengaluruurban
2,2025-03-09,Uttar Pradesh,Kanpur Nagar,208001,29,82,12,kanpurnagar
3,2025-03-09,Uttar Pradesh,Aligarh,202133,62,29,15,aligarh
4,2025-03-09,Karnataka,Bengaluru Urban,560016,14,16,21,bengaluruurban
...,...,...,...,...,...,...,...,...
1006024,2025-12-31,Telangana,Hyderabad,500045,4,5,1,hyderabad
1006025,2025-12-31,Telangana,Hyderabad,500057,0,2,0,hyderabad
1006026,2025-12-31,Telangana,Hyderabad,500061,4,2,0,hyderabad
1006027,2025-12-31,Telangana,Hyderabad,500062,1,4,0,hyderabad


In [20]:
district_master.head()

Unnamed: 0,state,district_norm,age_0_5,age_5_17,age_18_greater,bio_age_5_17,bio_age_17_,demo_age_5_17,demo_age_17_
0,100000,100000,0,1,217,0.0,0.0,0.0,2.0
1,Andaman & Nicobar Islands,andamans,70,5,0,343.0,1672.0,7.0,743.0
2,Andaman & Nicobar Islands,nicobars,1,0,0,1.0,1.0,0.0,4.0
3,Andaman & Nicobar Islands,southandaman,38,0,0,61.0,306.0,6.0,299.0
4,Andaman and Nicobar Islands,nicobar,64,11,0,992.0,819.0,58.0,729.0


In [21]:
district_master.columns


Index(['state', 'district_norm', 'age_0_5', 'age_5_17', 'age_18_greater',
       'bio_age_5_17', 'bio_age_17_', 'demo_age_5_17', 'demo_age_17_'],
      dtype='object')

In [22]:
district_master = district_master[
    district_master['state'].apply(lambda x: not str(x).isdigit())
]


In [23]:
def normalize_text(x):
    return str(x).lower().strip()

district_master['state'] = district_master['state'].apply(normalize_text)


In [24]:
district_master['total_enrolments'] = (
    district_master['age_0_5'] +
    district_master['age_5_17'] +
    district_master['age_18_greater']
)


In [25]:
district_master['child_share'] = (
    (district_master['age_0_5'] + district_master['age_5_17']) /
    (district_master['total_enrolments'] + 1)
)

district_master['adult_share'] = (
    district_master['age_18_greater'] /
    (district_master['total_enrolments'] + 1)
)
district_master

Unnamed: 0,state,district_norm,age_0_5,age_5_17,age_18_greater,bio_age_5_17,bio_age_17_,demo_age_5_17,demo_age_17_,total_enrolments,child_share,adult_share
1,andaman & nicobar islands,andamans,70,5,0,343.0,1672.0,7.0,743.0,75,0.986842,0.000000
2,andaman & nicobar islands,nicobars,1,0,0,1.0,1.0,0.0,4.0,1,0.500000,0.000000
3,andaman & nicobar islands,southandaman,38,0,0,61.0,306.0,6.0,299.0,38,0.974359,0.000000
4,andaman and nicobar islands,nicobar,64,11,0,992.0,819.0,58.0,729.0,75,0.986842,0.000000
5,andaman and nicobar islands,northandmiddleandaman,128,4,0,3780.0,2603.0,112.0,1897.0,132,0.992481,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
1030,west bengal,westmidnapore,1647,540,3,4390.0,14588.0,455.0,30225.0,2190,0.998174,0.001369
1031,west bengal,hooghly,6,1,0,5.0,14.0,0.0,49.0,7,0.875000,0.000000
1032,westbengal,hooghly,3,3,0,2.0,14.0,0.0,87.0,6,0.857143,0.000000
1033,andhra pradesh,chittoor,4,0,0,17.0,16.0,5.0,52.0,4,0.800000,0.000000


In [26]:
district_master['biometric_stress'] = (
    district_master['bio_age_17_'] /
    (district_master['age_18_greater'] + 1)
)


In [27]:
district_master['demo_update_ratio'] = (
    district_master['demo_age_17_'] /
    (district_master['age_18_greater'] + 1)
)


In [28]:
district_master['mobility_index'] = (
    district_master['demo_age_17_'] +
    district_master['age_18_greater']
)

district_master['mobility_index_norm'] = (
    district_master['mobility_index'] /
    district_master['mobility_index'].max()
)


In [29]:
district_master[
    ['state','district_norm','total_enrolments',
     'child_share','biometric_stress',
     'demo_update_ratio','mobility_index_norm']
].head(15)


Unnamed: 0,state,district_norm,total_enrolments,child_share,biometric_stress,demo_update_ratio,mobility_index_norm
1,andaman & nicobar islands,andamans,75,0.986842,1672.0,743.0,0.001803
2,andaman & nicobar islands,nicobars,1,0.5,1.0,4.0,1e-05
3,andaman & nicobar islands,southandaman,38,0.974359,306.0,299.0,0.000725
4,andaman and nicobar islands,nicobar,75,0.986842,819.0,729.0,0.001769
5,andaman and nicobar islands,northandmiddleandaman,132,0.992481,2603.0,1897.0,0.004603
6,andaman and nicobar islands,southandaman,190,0.994764,3491.0,2957.0,0.007175
7,andhra pradesh,adilabad,1419,0.998592,19915.0,13347.5,0.064776
8,andhra pradesh,allurisitharamaraju,1255,0.972134,290.542857,322.914286,0.027506
9,andhra pradesh,anakapalli,543,0.983456,353.444444,1298.666667,0.028379
10,andhra pradesh,anantapur,4415,0.999774,57731.0,59789.0,0.145073


In [30]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

score_features = [
    'biometric_stress',
    'demo_update_ratio',
    'mobility_index_norm',
    'total_enrolments'
]

district_master_scaled = district_master.copy()

district_master_scaled[score_features] = scaler.fit_transform(
    district_master[score_features]
)


In [31]:
district_master_scaled['risk_score'] = (
    0.4 * district_master_scaled['biometric_stress'] +
    0.3 * district_master_scaled['demo_update_ratio'] +
    0.2 * district_master_scaled['mobility_index_norm'] +
    0.1 * district_master_scaled['total_enrolments']
)


In [32]:
district_master_scaled['risk_zone'] = pd.cut(
    district_master_scaled['risk_score'],
    bins=[-1, 0.33, 0.66, 1.0],
    labels=['Low','Medium','High']
)


In [33]:
district_master_scaled[['state','district_norm','risk_score','risk_zone']].head(15)


Unnamed: 0,state,district_norm,risk_score,risk_zone
1,andaman & nicobar islands,andamans,0.014636,Low
2,andaman & nicobar islands,nicobars,2.8e-05,Low
3,andaman & nicobar islands,southandaman,0.003629,Low
4,andaman and nicobar islands,nicobar,0.009264,Low
5,andaman and nicobar islands,northandmiddleandaman,0.026895,Low
6,andaman and nicobar islands,southandaman,0.038372,Low
7,andhra pradesh,adilabad,0.206779,Low
8,andhra pradesh,allurisitharamaraju,0.011795,Low
9,andhra pradesh,anakapalli,0.015626,Low
10,andhra pradesh,anantapur,0.697434,High


In [34]:
def policy_recommendation(row):
    if row['biometric_stress'] > district_master['biometric_stress'].quantile(0.9):
        return "Upgrade biometric devices & enable iris authentication"

    elif row['demo_update_ratio'] > district_master['demo_update_ratio'].quantile(0.9):
        return "Open temporary enrolment/update centers for migrants"

    elif row['child_share'] > 0.95:
        return "Conduct school-based Aadhaar enrolment drives"

    elif row['total_enrolments'] > district_master['total_enrolments'].quantile(0.9):
        return "Increase staff capacity and appointment slot   s"

    else:
        return "Normal operations"


In [35]:
district_master_scaled['policy_action'] = district_master.apply(
    policy_recommendation, axis=1
)


In [36]:
district_master_scaled[
    ['state','district_norm','risk_zone','policy_action']
].head(20)


Unnamed: 0,state,district_norm,risk_zone,policy_action
1,andaman & nicobar islands,andamans,Low,Conduct school-based Aadhaar enrolment drives
2,andaman & nicobar islands,nicobars,Low,Normal operations
3,andaman & nicobar islands,southandaman,Low,Conduct school-based Aadhaar enrolment drives
4,andaman and nicobar islands,nicobar,Low,Conduct school-based Aadhaar enrolment drives
5,andaman and nicobar islands,northandmiddleandaman,Low,Conduct school-based Aadhaar enrolment drives
6,andaman and nicobar islands,southandaman,Low,Conduct school-based Aadhaar enrolment drives
7,andhra pradesh,adilabad,Low,Upgrade biometric devices & enable iris authen...
8,andhra pradesh,allurisitharamaraju,Low,Conduct school-based Aadhaar enrolment drives
9,andhra pradesh,anakapalli,Low,Conduct school-based Aadhaar enrolment drives
10,andhra pradesh,anantapur,High,Upgrade biometric devices & enable iris authen...


In [37]:
daily_ts = aadhar_enrolment_master_df.groupby(
    ['state','district_norm','date']
)['age_18_greater'].sum().reset_index()


In [38]:
daily_ts['mean'] = daily_ts.groupby(
    ['state','district_norm']
)['age_18_greater'].transform('mean')

daily_ts['std'] = daily_ts.groupby(
    ['state','district_norm']
)['age_18_greater'].transform('std')

daily_ts['z_score'] = (
    (daily_ts['age_18_greater'] - daily_ts['mean']) /
    (daily_ts['std'] + 1)
)


In [39]:
anomalies = daily_ts[abs(daily_ts['z_score']) > 3]
anomalies.head(15)


Unnamed: 0,state,district_norm,date,age_18_greater,mean,std,z_score
13,100000,100000,2025-12-15,161,10.333333,34.589497,4.233459
876,Andhra Pradesh,chittoor,2025-12-15,8,0.558442,1.292626,3.245867
1739,Andhra Pradesh,kurnool,2025-12-15,6,0.311688,0.862367,3.054345
2458,Andhra Pradesh,prakasam,2025-07-01,38,0.802632,4.40006,6.888325
2717,Andhra Pradesh,sripottisriramulunellore,2025-10-21,25,4.04,5.488169,3.230495
2879,Andhra Pradesh,tirupati,2025-11-07,22,3.04,4.774482,3.283412
2893,Andhra Pradesh,tirupati,2025-12-15,21,3.04,4.774482,3.110236
3273,Andhra Pradesh,y.s.r,2025-12-15,7,0.445946,1.136468,3.067706
3434,Arunachal Pradesh,eastkameng,2025-11-19,13,0.233333,1.681068,4.761784
3590,Arunachal Pradesh,kurungkumey,2025-11-19,12,0.363636,2.088932,3.767116
