# Part 1. Download Data

Website: https://www.poshantracker.in/statistics

In [None]:
import warnings
warnings.filterwarnings("ignore")

import os
import time
from datetime import datetime

import json
from tqdm import tqdm

import requests

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select, WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [None]:
data_dir = '../data'
os.makedirs(data_dir, exist_ok=True)

## Part 1. Download metadata

In [None]:
# 1. Set up Selenium WebDriver for Chrome.
driver = webdriver.Chrome()
driver.get("https://www.poshantracker.in/statistics")

In [None]:
# 2. Wait for the state dropdown to load.
wait = WebDriverWait(driver, 10)

In [None]:
# 3. Get all states.
state_select_elem = wait.until(EC.presence_of_element_located((By.NAME, "selectstate")))
state_select = Select(state_select_elem)

states = []
for option in state_select.options:
    state_name = option.text.strip()
    state_value = option.get_attribute('value')
    if state_value:
        states.append({'name': state_name, 'value': state_value})

with open(f'{data_dir}/states.json', 'w') as f:
    json.dump(states, f, indent=4)

In [None]:
# 4. Get all districts for each state.
districts = []

for state in tqdm(states):
    # 3. Select the state
    state_select.select_by_value(state['value'])
    # 4. Wait for the districts dropdown to update
    time.sleep(1.5)  # Or use WebDriverWait for more reliability

    district_select_elem = wait.until(EC.presence_of_element_located((By.NAME, "selectdistrict")))
    district_select = Select(district_select_elem)

    state_districts = []
    for option in district_select.options:
        district_name = option.text.strip()
        district_value = option.get_attribute('value')
        if district_value:
            state_districts.append({'name': district_name, 'value': district_value})

    districts.append({
        'state': state,
        'districts': state_districts
    })

with open(f'{data_dir}/districts.json', 'w') as f:
    json.dump(districts, f, indent=4)

In [None]:
# 5. Get all months.
month_select_elem = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'select[id="sel1 monthSelection"]')))
month_select = Select(month_select_elem)

months = []
for option in month_select.options:
    month_name = option.text.strip()
    month_value = option.get_attribute('value')
    if month_value:
        months.append({'name': month_name, 'value': month_value})

with open(f'{data_dir}/months.json', 'w') as f:
    json.dump(months, f, indent=4)

In [None]:
driver.quit()

## Part 2. Download actual data

In [None]:
version = f'v{datetime.now().strftime("%Y%m%d%H")}'

In [None]:
def fetch_and_write(url, filepath):

    time.sleep(0.1)

    response = requests.get(url)
    try:
        response.raise_for_status()
    except requests.exceptions.HTTPError as err:
        print(err)
    else:
        # Make sure that the directory exists.
        os.makedirs(os.path.dirname(filepath), exist_ok=True)

        with open(filepath, 'w') as f:
            json.dump(response.json(), f, indent=4)
        return response

### Section 1. Overall

In [None]:
with open('../data/months.json') as f:
    months = json.load(f)

In [None]:
for month in tqdm(months):
    (month_num, year) = month['value'].split('_')
    
    url = f"https://cdn.poshantracker.in/pt_dashboard/{year}/{month_num}/PT_Dashboard.json?{version}"
    filepath = f"{data_dir}/dashboard/{year}_{month_num}.json"
    fetch_and_write(url, filepath)

    url = f"https://cdn.poshantracker.in/pt_dashboard/{year}/{month_num}/monthWiseActiveBeneficiaries.json?{version}"
    filepath = f"{data_dir}/beneficiaries/{year}_{month_num}.json"
    fetch_and_write(url, filepath)
    
    url = f"https://cdn.poshantracker.in/pt_dashboard/{year}/{month_num}/keyServices_v2.json?{version}"
    filepath = f"{data_dir}/services_v2/{year}_{month_num}.json"
    fetch_and_write(url, filepath)

    url = f"https://cdn.poshantracker.in/pt_dashboard/{year}/{month_num}/aspirational_district.json?{version}"
    filepath = f"{data_dir}/aspirational_district/{year}_{month_num}.json"
    fetch_and_write(url, filepath)

    url = f"https://cdn.poshantracker.in/pt_dashboard/{year}/{month_num}/PT_Dashboard_growthmonitoring.json?{version}"
    filepath = f"{data_dir}/growth_monitoring/{year}_{month_num}.json"
    fetch_and_write(url, filepath)

    url = f"https://cdn.poshantracker.in/pt_dashboard/{year}/{month_num}/PT_Dashboard_homevisit.json?{version}"
    filepath = f"{data_dir}/home_visits/{year}_{month_num}.json"
    fetch_and_write(url, filepath)

    url = f"https://cdn.poshantracker.in/pt_dashboard/{year}/{month_num}/AnganwadiCenter_InternalDashboard.json?{version}"
    filepath = f"{data_dir}/anganwadi/{year}_{month_num}.json"
    fetch_and_write(url, filepath)

    url = f"https://cdn.poshantracker.in/pt_dashboard/{year}/{month_num}/awcInfrastructure_InternalDashboard.json?{version}"
    filepath = f"{data_dir}/awc_infrastructure/{year}_{month_num}.json"
    fetch_and_write(url, filepath)

    url = f"https://cdn.poshantracker.in/pt_dashboard/{year}/{month_num}/keyServices_v3.json?{version}"
    filepath = f"{data_dir}/services_v3/{year}_{month_num}.json"
    fetch_and_write(url, filepath)

### Section 2. State level

In [None]:
with open('../data/states.json') as f:
    states = json.load(f)

In [None]:
for month in tqdm(months):
    (month_num, year) = month['value'].split('_')

    for state in states:
        state_value = state['value']

        url = f"https://cdn.poshantracker.in/pt_dashboard/{year}/{month_num}/{state_value}/{state_value}_PT_Dashboard.json?{version}"
        filepath = f"{data_dir}/dashboard/{state_value}/{year}_{month_num}.json"
        fetch_and_write(url, filepath)

        url = f"https://cdn.poshantracker.in/pt_dashboard/{year}/{month_num}/{state_value}/{state_value}_monthWiseActiveBeneficiaries.json?{version}"
        filepath = f"{data_dir}/beneficiaries/{state_value}/{year}_{month_num}.json"
        fetch_and_write(url, filepath)
        
        url = f"https://cdn.poshantracker.in/pt_dashboard/{year}/{month_num}/{state_value}/{state_value}_keyServices_v2.json?{version}"
        filepath = f"{data_dir}/services_v2/{state_value}/{year}_{month_num}.json"
        fetch_and_write(url, filepath)

        url = f"https://cdn.poshantracker.in/pt_dashboard/{year}/{month_num}/{state_value}/{state_value}_aspirational_district.json?{version}"
        filepath = f"{data_dir}/aspirational_district/{state_value}/{year}_{month_num}.json"
        fetch_and_write(url, filepath)

        url = f"https://cdn.poshantracker.in/pt_dashboard/{year}/{month_num}/{state_value}/{state_value}_PT_Dashboard_growthmonitoring.json?{version}"
        filepath = f"{data_dir}/growth_monitoring/{state_value}/{year}_{month_num}.json"
        fetch_and_write(url, filepath)

        url = f"https://cdn.poshantracker.in/pt_dashboard/{year}/{month_num}/{state_value}/{state_value}_PT_Dashboard_homevisit.json?{version}"
        filepath = f"{data_dir}/home_visits/{state_value}/{year}_{month_num}.json"
        fetch_and_write(url, filepath)

        url = f"https://cdn.poshantracker.in/pt_dashboard/{year}/{month_num}/{state_value}/{state_value}_AnganwadiCenter_InternalDashboard.json?{version}"
        filepath = f"{data_dir}/anganwadi/{state_value}/{year}_{month_num}.json"
        fetch_and_write(url, filepath)

        url = f"https://cdn.poshantracker.in/pt_dashboard/{year}/{month_num}/{state_value}/{state_value}_awcInfrastructure_InternalDashboard.json?{version}"
        filepath = f"{data_dir}/awc_infrastructure/{state_value}/{year}_{month_num}.json"
        fetch_and_write(url, filepath)

        url = f"https://cdn.poshantracker.in/pt_dashboard/{year}/{month_num}/{state_value}/{state_value}_keyServices_v3.json?{version}"
        filepath = f"{data_dir}/services_v3/{state_value}/{year}_{month_num}.json"
        fetch_and_write(url, filepath)

### Section 3. District level