In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("coal_dataset_10k_5years.csv")

gas_columns = ['CO2_ppm', 'CH4_ppm', 'SO2_ppm', 'NOx_ppm']
meta_columns = ['State', 'District', 'Mine_Name', 'Latitude', 'Longitude']

df = df.dropna(subset=gas_columns + meta_columns)

weights = {
    'CO2_ppm': 0.5,
    'CH4_ppm': 0.3,
    'SO2_ppm': 0.1,
    'NOx_ppm': 0.1
}

df['Emission_Score'] = (
    weights['CO2_ppm'] * df['CO2_ppm'] +
    weights['CH4_ppm'] * df['CH4_ppm'] +
    weights['SO2_ppm'] * df['SO2_ppm'] +
    weights['NOx_ppm'] * df['NOx_ppm']
)

mine_avg = (
    df.groupby(meta_columns)[['CO2_ppm', 'CH4_ppm', 'SO2_ppm', 'NOx_ppm', 'Emission_Score']]
    .mean()
    .reset_index()
)

threshold = mine_avg['Emission_Score'].quantile(0.9)
mine_avg['Hotspot'] = np.where(mine_avg['Emission_Score'] >= threshold, 'Yes', 'No')

output_data = {
    "hotspot_threshold": round(threshold, 2),
    "average_gas_levels": mine_avg[['CO2_ppm', 'CH4_ppm', 'SO2_ppm', 'NOx_ppm']].mean().round(2).to_dict(),
    "hotspot_data": mine_avg.to_dict(orient='records')
}

mine_avg.to_csv("hotspot_data.csv", index=False)

output_data


{'hotspot_threshold': np.float64(217.94),
 'average_gas_levels': {'CO2_ppm': 425.1,
  'CH4_ppm': 1.95,
  'SO2_ppm': 13.99,
  'NOx_ppm': 26.04},
 'hotspot_data': [{'State': 'Chhattisgarh',
   'District': 'Korba',
   'Mine_Name': 'Dipka',
   'Latitude': 22.418,
   'Longitude': 82.721,
   'CO2_ppm': 423.9587114601761,
   'CH4_ppm': 1.9556750405094592,
   'SO2_ppm': 14.278576765152696,
   'NOx_ppm': 26.127390408500553,
   'Emission_Score': 216.60665495960617,
   'Hotspot': 'No'},
  {'State': 'Chhattisgarh',
   'District': 'Korba',
   'Mine_Name': 'Gevra',
   'Latitude': 22.331,
   'Longitude': 82.614,
   'CO2_ppm': 424.7482283507449,
   'CH4_ppm': 1.952296468524279,
   'SO2_ppm': 13.646888977704725,
   'NOx_ppm': 26.352602330125595,
   'Emission_Score': 216.95975224671278,
   'Hotspot': 'No'},
  {'State': 'Chhattisgarh',
   'District': 'Korba',
   'Mine_Name': 'Kusmunda',
   'Latitude': 22.25,
   'Longitude': 82.52,
   'CO2_ppm': 424.85901765354305,
   'CH4_ppm': 1.9312742211607117,
   'SO