In [48]:
import pandas as pd
import numpy as np

In [49]:
df = pd.read_csv('agri_energy_merged.csv')

Column	Rule
solar_mw_district	Very small (<0.5 MW), off-grid only
wind_mw	0 MW (no commercial wind)
bioenergy_mw	0 MW (traditional biomass ≠ modern bioenergy)
solar_pumps_installed	0 (rain-fed agriculture)

In [14]:
mask_anjaw = df['district'].str.lower().eq('anjaw')

print("anjaw rows:", mask_anjaw.sum())


anjaw rows: 132


In [15]:
def anjaw_solar_seasonal_factor(month):
    if month in [3, 4, 5]:        # Pre-monsoon peak
        return 1.0
    elif month in [6, 7, 8, 9]:   # Heavy monsoon
        return 0.5
    else:                         # Winter & post-monsoon
        return 0.7


In [16]:
base_solar_mw = 0.35  # conservative off-grid estimate

df.loc[
    mask_anjaw & df['solar_mw_district'].isna(),
    'solar_mw_district'
] = (
    df.loc[mask_anjaw & df['solar_mw_district'].isna(), 'month']
    .apply(lambda m: base_solar_mw * anjaw_solar_seasonal_factor(m))
)


In [17]:
df.loc[mask_anjaw & df['wind_mw'].isna(), 'wind_mw'] = 0.0
df.loc[mask_anjaw & df['bioenergy_mw'].isna(), 'bioenergy_mw'] = 0.0


In [18]:
df.loc[
    mask_anjaw & df['solar_pumps_installed'].isna(),
    'solar_pumps_installed'
] = 0


In [19]:
CO2_PER_MW_YEAR = 1200  # tonnes

df.loc[
    mask_anjaw & df['estimated_co2_reduction_tonnes'].isna(),
    'estimated_co2_reduction_tonnes'
] = (
    df.loc[mask_anjaw & df['estimated_co2_reduction_tonnes'].isna(), 'solar_mw_district']
    * CO2_PER_MW_YEAR / 12
)


In [24]:
print('total:',df.isna().sum())
df.loc[mask_anjaw].isna().sum()


total: Unnamed: 0.1                          0
Unnamed: 0                            0
state                                 0
district                              0
year                                  0
month                                 0
solar_mw_district                 25872
wind_mw                           52008
bioenergy_mw                       6336
solar_pumps_installed             12804
estimated_co2_reduction_tonnes    25872
dtype: int64


Unnamed: 0.1                      0
Unnamed: 0                        0
state                             0
district                          0
year                              0
month                             0
solar_mw_district                 0
wind_mw                           0
bioenergy_mw                      0
solar_pumps_installed             0
estimated_co2_reduction_tonnes    0
dtype: int64


State: Arunachal Pradesh
District: Changlang
Period: 2015–2025

Technology rules
Column	Reality-based rule
solar_mw_district	Grows from ~0 → 1.2–1.5 MW by 2025
wind_mw	Always 0 (no commercial wind)
bioenergy_mw	Very small, modern bioenergy only
solar_pumps_installed	Starts post-2019 via PM-KUSUM
estimated_co2_reduction_tonnes	Derived from solar + bioenergy only

In [25]:
mask_changlang = df['district'].str.lower().eq('changlang')

print("anjaw rows:", mask_changlang.sum())

anjaw rows: 132


In [26]:
def changlang_solar_factor(month):
    # High yield: Oct–Apr
    if month in [10, 11, 12, 1, 2, 3, 4]:
        return 1.0
    # Extreme monsoon: May–Sept (≈ 55–60% drop)
    else:
        return 0.42


In [27]:
def changlang_base_solar_mw(year):
    if year <= 2018:
        return 0.02      # negligible (<5 kW effective)
    elif year <= 2020:
        return 0.10      # first pilot pumps
    elif year <= 2022:
        return 0.30
    else:
        return 1.35      # 2023–2025 steady state


In [28]:
df.loc[
    mask_changlang & df['solar_mw_district'].isna(),
    'solar_mw_district'
] = (
    df.loc[mask_changlang & df['solar_mw_district'].isna()]
    .apply(lambda r:
           changlang_base_solar_mw(r['year']) *
           changlang_solar_factor(r['month']),
           axis=1)
)


In [29]:
df.loc[
    mask_changlang & df['wind_mw'].isna(),
    'wind_mw'
] = 0.0


In [30]:
def changlang_bioenergy_mw(year):
    if year <= 2020:
        return 0.0
    elif year <= 2022:
        return 0.003
    else:
        return 0.006


In [31]:
df.loc[
    mask_changlang & df['bioenergy_mw'].isna(),
    'bioenergy_mw'
] = df.loc[
    mask_changlang & df['bioenergy_mw'].isna(), 'year'
].apply(changlang_bioenergy_mw)


In [32]:
def changlang_solar_pumps(year):
    if year <= 2018:
        return 0
    elif year <= 2020:
        return 5
    elif year <= 2022:
        return 20
    else:
        return 60


In [33]:
df.loc[
    mask_changlang & df['solar_pumps_installed'].isna(),
    'solar_pumps_installed'
] = df.loc[
    mask_changlang & df['solar_pumps_installed'].isna(), 'year'
].apply(changlang_solar_pumps)


In [34]:
CO2_PER_MW_YEAR = 1200

df.loc[
    mask_changlang & df['estimated_co2_reduction_tonnes'].isna(),
    'estimated_co2_reduction_tonnes'
] = (
    (
        df.loc[mask_changlang, 'solar_mw_district'] +
        df.loc[mask_changlang, 'bioenergy_mw']
    ) * CO2_PER_MW_YEAR / 12
)


In [35]:
df.loc[mask_changlang].isna().sum()


Unnamed: 0.1                      0
Unnamed: 0                        0
state                             0
district                          0
year                              0
month                             0
solar_mw_district                 0
wind_mw                           0
bioenergy_mw                      0
solar_pumps_installed             0
estimated_co2_reduction_tonnes    0
dtype: int64

### arunachal pradesh dibang valley

In [60]:

df['state'] = df['state'].astype(str).str.strip().str.lower()
df['district'] = df['district'].astype(str).str.strip().str.lower()


In [64]:
mask_dibang = (
    (df['state'] == 'arunanchal pradesh	') &
    (df['district'].isin(['dibang valley	', 'lower dibang valley	']))
)

print("Rows matched:", mask_dibang.sum())


Rows matched: 0


In [65]:
df.loc[
    df['district'].str.contains('dibang', case=False, na=False),
    ['state', 'district']
].drop_duplicates()


Unnamed: 0,state,district
57420,arunanchal pradesh,dibang valley
69168,arunanchal pradesh,lower dibang valley


In [66]:
df['state'] = (
    df['state']
    .astype(str)
    .str.strip()
    .str.lower()
)

df['district'] = (
    df['district']
    .astype(str)
    .str.strip()
    .str.lower()
    .str.replace(r'\s+', ' ', regex=True)
)


In [74]:
mask_dibang = (
    df['district'].str.contains('dibang', case=False, na=False)
)

print("Rows matched:", mask_dibang.sum())


Rows matched: 264


In [75]:
def dibang_solar_factor(month):
    if month in [3, 4, 5]:        # Pre-monsoon peak
        return 1.0
    elif month in [6, 7, 8, 9]:   # Heavy monsoon
        return 0.35
    else:                         # Winter & post-monsoon
        return 0.65


In [76]:
BASE_SOLAR_MW = 0.25

df.loc[
    mask_dibang & df['solar_mw_district'].isna(),
    'solar_mw_district'
] = (
    df.loc[mask_dibang & df['solar_mw_district'].isna(), 'month']
    .apply(lambda m: BASE_SOLAR_MW * dibang_solar_factor(m))
)


In [77]:
df.loc[mask_dibang & df['wind_mw'].isna(), 'wind_mw'] = 0.0
df.loc[mask_dibang & df['bioenergy_mw'].isna(), 'bioenergy_mw'] = 0.0


In [78]:
df.loc[
    mask_dibang & df['solar_pumps_installed'].isna(),
    'solar_pumps_installed'
] = df.loc[
    mask_dibang & df['solar_pumps_installed'].isna(),
    'year'
].apply(lambda y: 0 if y < 2020 else 2)


In [79]:
CO2_PER_MW_YEAR = 1200

df.loc[
    mask_dibang & df['estimated_co2_reduction_tonnes'].isna(),
    'estimated_co2_reduction_tonnes'
] = (
    df.loc[
        mask_dibang & df['estimated_co2_reduction_tonnes'].isna(),
        'solar_mw_district'
    ] * CO2_PER_MW_YEAR / 12
)


### Arunachal Pradesh east kameng


In [83]:
mask_east_kameng = (
    (df['district'] == 'east kameng')
)

print("Rows matched for East Kameng:", mask_east_kameng.sum())


Rows matched for East Kameng: 132


In [84]:
def east_kameng_solar_factor(month):
    if month in [10, 11, 12, 1, 2, 3, 4]:   # Oct–April (peak)
        return 1.0
    elif month in [6, 7, 8, 9]:            # Monsoon
        return 0.45
    else:                                  # May shoulder
        return 0.65


In [85]:
TOTAL_SOLAR_MW = 0.18

df.loc[
    mask_east_kameng & df['solar_mw_district'].isna(),
    'solar_mw_district'
] = (
    df.loc[mask_east_kameng & df['solar_mw_district'].isna(), 'month']
    .apply(lambda m: TOTAL_SOLAR_MW * east_kameng_solar_factor(m))
)


In [86]:
df.loc[mask_east_kameng & df['wind_mw'].isna(), 'wind_mw'] = 0.0
df.loc[mask_east_kameng & df['bioenergy_mw'].isna(), 'bioenergy_mw'] = 0.0


In [87]:
def east_kameng_pumps(year):
    if year <= 2018:
        return 2        # pilot
    elif year <= 2022:
        return 10       # PM-KUSUM ramp-up
    else:
        return 20       # scale phase
        

df.loc[
    mask_east_kameng & df['solar_pumps_installed'].isna(),
    'solar_pumps_installed'
] = df.loc[
    mask_east_kameng & df['solar_pumps_installed'].isna(),
    'year'
].apply(east_kameng_pumps)


In [88]:
CO2_PER_MW_YEAR = 1200

df.loc[
    mask_east_kameng & df['estimated_co2_reduction_tonnes'].isna(),
    'estimated_co2_reduction_tonnes'
] = (
    df.loc[
        mask_east_kameng & df['estimated_co2_reduction_tonnes'].isna(),
        'solar_mw_district'
    ] * CO2_PER_MW_YEAR / 12
)


In [90]:
# Group by state & district and count nulls column-wise
null_summary = (
    df
    .groupby(['state', 'district'], as_index=False)
    .agg(lambda x: x.isna().sum())
)

# Keep only rows where at least one null exists
null_summary_filtered = null_summary[
    null_summary.drop(columns=['state', 'district']).sum(axis=1) > 0
]

null_summary_filtered.head(40)

Unnamed: 0.2,state,district,Unnamed: 0.1,Unnamed: 0,year,month,solar_mw_district,wind_mw,bioenergy_mw,solar_pumps_installed,estimated_co2_reduction_tonnes
3,andhra pradesh,adilabad,0,0,0,0,0,0,0,132,0
4,andhra pradesh,anantapur,0,0,0,0,0,0,0,132,0
5,andhra pradesh,chittoor,0,0,0,0,0,0,0,132,0
6,andhra pradesh,east godavari,0,0,0,0,0,0,0,132,0
7,andhra pradesh,guntur,0,0,0,0,0,0,0,132,0
8,andhra pradesh,hyderabad,0,0,0,0,0,0,0,132,0
9,andhra pradesh,karimnagar,0,0,0,0,0,0,0,132,0
10,andhra pradesh,khammam,0,0,0,0,0,0,0,132,0
11,andhra pradesh,krishna,0,0,0,0,0,0,0,132,0
12,andhra pradesh,kurnool,0,0,0,0,0,0,0,132,0


### arunchal predesh east siaing

In [92]:
mask_east_siang = (
    (df['district'] == 'east siang')
)

print("Rows matched (East Siang):", mask_east_siang.sum())


Rows matched (East Siang): 132


In [93]:
def east_siang_solar_factor(month):
    if month in [2, 3, 4, 5]:        # Feb–May (peak)
        return 1.0
    elif month in [6, 7, 8]:         # Monsoon
        return 0.5
    else:                            # Shoulder months
        return 0.75


In [94]:
def east_siang_base_solar(year):
    return 0.05 + (year - 2015) * (1.5 - 0.05) / (2025 - 2015)


In [95]:
df.loc[
    mask_east_siang & df['solar_mw_district'].isna(),
    'solar_mw_district'
] = (
    df.loc[mask_east_siang & df['solar_mw_district'].isna()]
    .apply(lambda r: east_siang_base_solar(r['year']) *
                     east_siang_solar_factor(r['month']), axis=1)
)


In [96]:
df.loc[
    mask_east_siang & df['wind_mw'].isna(),
    'wind_mw'
] = df.loc[
    mask_east_siang & df['wind_mw'].isna(),
    'year'
].apply(lambda y: 0.0 if y < 2019 else 0.01)


In [97]:
df.loc[
    mask_east_siang & df['bioenergy_mw'].isna(),
    'bioenergy_mw'
] = df.loc[
    mask_east_siang & df['bioenergy_mw'].isna(),
    'year'
].apply(lambda y: 0.0 if y < 2019 else 0.03)


In [98]:
def east_siang_pumps(year):
    if year <= 2018:
        return 5
    elif year <= 2022:
        return 15
    else:
        return 25

df.loc[
    mask_east_siang & df['solar_pumps_installed'].isna(),
    'solar_pumps_installed'
] = df.loc[
    mask_east_siang & df['solar_pumps_installed'].isna(),
    'year'
].apply(east_siang_pumps)


In [99]:
CO2_PER_MW_YEAR = 1200

df.loc[
    mask_east_siang & df['estimated_co2_reduction_tonnes'].isna(),
    'estimated_co2_reduction_tonnes'
] = (
    (df.loc[mask_east_siang, 'solar_mw_district'] +
     df.loc[mask_east_siang, 'bioenergy_mw'])
    * CO2_PER_MW_YEAR / 12
)


In [100]:
df.isna().sum()

Unnamed: 0.1                          0
Unnamed: 0                            0
state                                 0
district                              0
year                                  0
month                                 0
solar_mw_district                 25212
wind_mw                           51348
bioenergy_mw                       5676
solar_pumps_installed             12144
estimated_co2_reduction_tonnes    25212
dtype: int64

### arunchal pradesh kurung kumey

In [105]:
mask_kk = (
    
    df['district'].str.lower().eq('kurung kumey')
)

mask_kk.sum()

np.int64(132)

In [107]:
def estimate_solar_mu_kk(year):
    if 2015 <= year <= 2018:
        return 0.0
    elif 2019 <= year <= 2021:
        return 0.01
    elif 2022 <= year <= 2025:
        return 0.03
    else:
        return None



In [108]:
df.loc[mask_kk, 'wind_mw'] = 0
df.loc[mask_kk, 'bioenergy_mw'] = 0
df.loc[mask_kk, 'solar_mw_district'] = 0.08
df.loc[mask_kk, 'solar_pumps_installed'] = 20


In [109]:
EMISSION_FACTOR = 0.82  # kg CO2 per kWh

def estimate_co2_kk(year):
    mu = estimate_solar_mu_kk(year)
    if mu is None:
        return None
    kwh = mu * 1_000_000
    tonnes = (kwh * EMISSION_FACTOR) / 1000
    return round(tonnes, 2)


In [110]:
df.loc[mask_kk, 'estimated_co2_reduction_tonnes'] = (
    df.loc[mask_kk, 'year'].apply(estimate_co2_kk)
)


In [113]:
df.isna().sum()


Unnamed: 0.1                          0
Unnamed: 0                            0
state                                 0
district                              0
year                                  0
month                                 0
solar_mw_district                 25080
wind_mw                           51216
bioenergy_mw                       5544
solar_pumps_installed             12012
estimated_co2_reduction_tonnes    25080
dtype: int64

### arunchal pradesh lohit

In [115]:
mask_lohit = ( 
    df['district'].str.lower().eq('lohit')
)

mask_lohit.sum()

np.int64(132)

In [116]:
def estimate_solar_mu_lohit(year):
    if 2015 <= year <= 2018:
        return 0.0
    elif 2019 <= year <= 2021:
        return 0.03
    elif 2022 <= year <= 2025:
        return 0.12
    else:
        return None


In [117]:
df.loc[mask_lohit, 'wind_mw'] = 0.01        # 10 kW hybrid pilot
df.loc[mask_lohit, 'bioenergy_mw'] = 0.01  # negligible electric bio-power
df.loc[mask_lohit, 'solar_mw_district'] = 0.20
df.loc[mask_lohit, 'solar_pumps_installed'] = 52


In [118]:
EMISSION_FACTOR = 0.82  # kg CO2 per kWh

def estimate_co2_lohit(year):
    mu = estimate_solar_mu_lohit(year)
    if mu is None:
        return None
    kwh = mu * 1_000_000
    tonnes = (kwh * EMISSION_FACTOR) / 1000
    return round(tonnes, 2)


In [119]:
df.loc[mask_lohit, 'estimated_co2_reduction_tonnes'] = (
    df.loc[mask_lohit, 'year'].apply(estimate_co2_lohit)
)


### arunanchal pradesh	lower subansiri	

In [124]:
mask_ls = (
    df['district'].str.lower().eq('lower subansiri')
)

mask_ls.sum()

np.int64(132)

In [125]:
def estimate_solar_mu_ls(year):
    if 2015 <= year <= 2018:
        return 0.0
    elif 2019 <= year <= 2021:
        return 0.04
    elif 2022 <= year <= 2025:
        return 0.15
    else:
        return None


In [126]:
df.loc[mask_ls, 'wind_mw'] = 0
df.loc[mask_ls, 'bioenergy_mw'] = 0
df.loc[mask_ls, 'solar_mw_district'] = 0.20
df.loc[mask_ls, 'solar_pumps_installed'] = 100


In [127]:
EMISSION_FACTOR = 0.82  # kg CO2 per kWh

def estimate_co2_ls(year):
    mu = estimate_solar_mu_ls(year)
    if mu is None:
        return None
    kwh = mu * 1_000_000
    tonnes = (kwh * EMISSION_FACTOR) / 1000
    return round(tonnes, 2)


In [128]:
df.loc[mask_ls, 'estimated_co2_reduction_tonnes'] = (
    df.loc[mask_ls, 'year'].apply(estimate_co2_ls)
)


### arunachal Pradesh Pupamp Pare

In [130]:
mask_pp = (
    df['district'].str.lower().eq('papum pare')
)

mask_pp.sum()


np.int64(132)

In [131]:
def estimate_solar_mu_pp(year):
    if 2015 <= year <= 2018:
        return 0.01
    elif 2019 <= year <= 2021:
        return 0.10
    elif 2022 <= year <= 2025:
        return 0.35
    else:
        return None


In [132]:
df.loc[mask_pp, 'wind_mw'] = 0.03
df.loc[mask_pp, 'bioenergy_mw'] = 0.02
df.loc[mask_pp, 'solar_mw_district'] = 0.45
df.loc[mask_pp, 'solar_pumps_installed'] = 180


In [133]:
EMISSION_FACTOR = 0.82  # kg CO2 per kWh

def estimate_co2_pp(year):
    mu = estimate_solar_mu_pp(year)
    if mu is None:
        return None
    kwh = mu * 1_000_000
    tonnes = (kwh * EMISSION_FACTOR) / 1000
    return round(tonnes, 2)


In [134]:
df.loc[mask_pp, 'estimated_co2_reduction_tonnes'] = (
    df.loc[mask_pp, 'year'].apply(estimate_co2_pp)
)


### arunanchal pradesh	tawang	

In [135]:
mask_tawang = (
    df['district'].str.lower().eq('tawang')
)

mask_tawang.sum()


np.int64(132)

In [136]:
def estimate_solar_mu_tawang(year):
    if 2015 <= year <= 2017:
        return 0.01
    elif 2018 <= year <= 2020:
        return 0.04
    elif 2021 <= year <= 2025:
        return 0.10
    else:
        return None


In [137]:
df.loc[mask_tawang, 'wind_mw'] = 0
df.loc[mask_tawang, 'bioenergy_mw'] = 0
df.loc[mask_tawang, 'solar_mw_district'] = 0.15
df.loc[mask_tawang, 'solar_pumps_installed'] = 35


In [138]:
EMISSION_FACTOR = 0.82  # kg CO2 per kWh

def estimate_co2_tawang(year):
    mu = estimate_solar_mu_tawang(year)
    if mu is None:
        return None
    kwh = mu * 1_000_000
    tonnes = (kwh * EMISSION_FACTOR) / 1000
    return round(tonnes, 2)


In [139]:
df.loc[mask_tawang, 'estimated_co2_reduction_tonnes'] = (
    df.loc[mask_tawang, 'year'].apply(estimate_co2_tawang)
)


### arunachal prdesh tirap

In [141]:
mask_tirap = (
    df['district'].str.lower().eq('tirap')
)

mask_tirap.sum()

np.int64(132)

In [142]:
def estimate_solar_mu_tirap(year):
    if 2015 <= year <= 2018:
        return 0.0
    elif 2019 <= year <= 2021:
        return 0.02
    elif 2022 <= year <= 2025:
        return 0.08
    else:
        return None


In [143]:
df.loc[mask_tirap, 'wind_mw'] = 0
df.loc[mask_tirap, 'bioenergy_mw'] = 0
df.loc[mask_tirap, 'solar_mw_district'] = 0.12
df.loc[mask_tirap, 'solar_pumps_installed'] = 38


In [144]:
EMISSION_FACTOR = 0.82  # kg CO2 per kWh

def estimate_co2_tirap(year):
    mu = estimate_solar_mu_tirap(year)
    if mu is None:
        return None
    kwh = mu * 1_000_000
    tonnes = (kwh * EMISSION_FACTOR) / 1000
    return round(tonnes, 2)


In [145]:
df.loc[mask_tirap, 'estimated_co2_reduction_tonnes'] = (
    df.loc[mask_tirap, 'year'].apply(estimate_co2_tirap)
)


### arunchl Pradesh upper siang

In [146]:
mask_us = (
    df['district'].str.lower().eq('upper siang')
)

mask_us.sum()

np.int64(132)

In [147]:
def estimate_solar_mu_us(year):
    if 2015 <= year <= 2018:
        return 0.0
    elif 2019 <= year <= 2021:
        return 0.03
    elif 2022 <= year <= 2025:
        return 0.10
    else:
        return None


In [148]:
df.loc[mask_us, 'wind_mw'] = 0
df.loc[mask_us, 'bioenergy_mw'] = 0
df.loc[mask_us, 'solar_mw_district'] = 0.10
df.loc[mask_us, 'solar_pumps_installed'] = 28


In [149]:
EMISSION_FACTOR = 0.82  # kg CO2 per kWh

def estimate_co2_us(year):
    mu = estimate_solar_mu_us(year)
    if mu is None:
        return None
    kwh = mu * 1_000_000
    tonnes = (kwh * EMISSION_FACTOR) / 1000
    return round(tonnes, 2)


In [150]:
df.loc[mask_us, 'estimated_co2_reduction_tonnes'] = (
    df.loc[mask_us, 'year'].apply(estimate_co2_us)
)


### arunachal pradesh upper subansiri

In [151]:
mask_uss = (
    df['district'].str.lower().eq('upper subansiri')
)

mask_uss.sum()

np.int64(132)

In [152]:
def estimate_solar_mu_uss(year):
    if 2015 <= year <= 2018:
        return 0.0
    elif 2019 <= year <= 2021:
        return 0.04
    elif 2022 <= year <= 2025:
        return 0.12
    else:
        return None


In [153]:
df.loc[mask_uss, 'wind_mw'] = 0
df.loc[mask_uss, 'bioenergy_mw'] = 0
df.loc[mask_uss, 'solar_mw_district'] = 0.12
df.loc[mask_uss, 'solar_pumps_installed'] = 42


In [154]:
EMISSION_FACTOR = 0.82

def estimate_co2_uss(year):
    mu = estimate_solar_mu_uss(year)
    if mu is None:
        return None
    return round((mu * 1_000_000 * EMISSION_FACTOR) / 1000, 2)


In [155]:
df.loc[mask_uss, 'estimated_co2_reduction_tonnes'] = (
    df.loc[mask_uss, 'year'].apply(estimate_co2_uss)
)


### arunanchal pradesh west kameng

In [156]:
mask_wk = (
    df['district'].str.lower().eq('west kameng')
)

mask_wk.sum()

np.int64(132)

In [157]:
def estimate_solar_mu_wk(year):
    if 2015 <= year <= 2017:
        return 0.01
    elif 2018 <= year <= 2021:
        return 0.08
    elif 2022 <= year <= 2025:
        return 0.25
    else:
        return None


In [158]:
def estimate_wind_mu_wk(year):
    if year < 2023:
        return 0.0
    else:
        return 0.015


In [159]:
df.loc[mask_wk, 'solar_mw_district'] = 0.35
df.loc[mask_wk, 'solar_pumps_installed'] = 150

df.loc[mask_wk, 'wind_mw'] = 0.007   # wind part of hybrid
df.loc[mask_wk, 'bioenergy_mw'] = 0


In [160]:
EMISSION_FACTOR = 0.82

def estimate_co2_wk(row):
    solar_mu = estimate_solar_mu_wk(row['year'])
    wind_mu = estimate_wind_mu_wk(row['year'])
    total_mu = (solar_mu or 0) + (wind_mu or 0)
    return round((total_mu * 1_000_000 * EMISSION_FACTOR) / 1000, 2)


In [161]:
df.loc[mask_wk, 'estimated_co2_reduction_tonnes'] = (
    df.loc[mask_wk].apply(estimate_co2_wk, axis=1)
)


### arunachal pradesh west siang

In [162]:
mask_ws = (
    df['district'].str.lower().eq('west siang')
)

mask_ws.sum()

np.int64(132)

In [163]:
def estimate_solar_mu_ws(year):
    if 2015 <= year <= 2018:
        return 0.0
    elif 2019 <= year <= 2021:
        return 0.04
    elif 2022 <= year <= 2025:
        return 0.15
    else:
        return None


In [164]:
def estimate_wind_mu_ws(year):
    return 0.0


In [165]:
df.loc[mask_ws, 'solar_mw_district'] = 0.25
df.loc[mask_ws, 'solar_pumps_installed'] = 110

df.loc[mask_ws, 'wind_mw'] = 0.0
df.loc[mask_ws, 'bioenergy_mw'] = 0.0


In [166]:
EMISSION_FACTOR = 0.82

def estimate_co2_ws(row):
    solar_mu = estimate_solar_mu_ws(row['year'])
    wind_mu = estimate_wind_mu_ws(row['year'])
    total_mu = (solar_mu or 0) + (wind_mu or 0)
    return round((total_mu * 1_000_000 * EMISSION_FACTOR) / 1000, 2)


In [167]:
df.loc[mask_ws, 'estimated_co2_reduction_tonnes'] = (
    df.loc[mask_ws].apply(estimate_co2_ws, axis=1)
)


In [168]:
df.isna().sum()

Unnamed: 0.1                          0
Unnamed: 0                            0
state                                 0
district                              0
year                                  0
month                                 0
solar_mw_district                 23892
wind_mw                           50028
bioenergy_mw                       4356
solar_pumps_installed             10824
estimated_co2_reduction_tonnes    23892
dtype: int64

In [177]:
# Group by state & district and count nulls column-wise
null_summary = (
    df
    .groupby(['state', 'district'], as_index=False)
    .agg(lambda x: x.isna().sum())
)

# Keep only rows where at least one null exists
null_summary_filtered = null_summary[
    null_summary.drop(columns=['state', 'district']).sum(axis=1) > 0
]

null_summary_filtered.loc[0:20]

Unnamed: 0.2,state,district,Unnamed: 0.1,Unnamed: 0,year,month,solar_mw_district,wind_mw,bioenergy_mw,solar_pumps_installed,estimated_co2_reduction_tonnes
3,andhra pradesh,adilabad,0,0,0,0,0,0,0,132,0
4,andhra pradesh,anantapur,0,0,0,0,0,0,0,132,0
5,andhra pradesh,chittoor,0,0,0,0,0,0,0,132,0
6,andhra pradesh,east godavari,0,0,0,0,0,0,0,132,0
7,andhra pradesh,guntur,0,0,0,0,0,0,0,132,0
8,andhra pradesh,hyderabad,0,0,0,0,0,0,0,132,0
9,andhra pradesh,karimnagar,0,0,0,0,0,0,0,132,0
10,andhra pradesh,khammam,0,0,0,0,0,0,0,132,0
11,andhra pradesh,krishna,0,0,0,0,0,0,0,132,0
12,andhra pradesh,kurnool,0,0,0,0,0,0,0,132,0
