In [1]:
import pandas as pd

# Configuration weight
GAS_WEIGHTS = {
    'ghgs': 0.6,      
    'ghgs_ch4': 0.3,  
    'ghgs_n2o': 0.1   
}

# Emissions Level
THRESHOLDS = {
    'high': 1.0,    # weighted_score ≥ 1.0 → High Emission
    'medium': 0.5   # 0.5 ≤ score < 1.0 → Medium Emission
}                   # score < 0.5 → Low Emission

df = pd.read_csv("Results_21Mar2022.csv")

gases = ['ghgs', 'ghgs_ch4', 'ghgs_n2o']
global_params = {}

for gas in gases:
    mean_col = f"mean_{gas}"
    mean_values = df[mean_col]
    global_params[gas] = {
        'mu': mean_values.mean(),
        'sigma': mean_values.std(ddof=0)
    }
    print(f"{gas}: μ={global_params[gas]['mu']:.2f}, σ={global_params[gas]['sigma']:.2f}")

def calculate_weighted_score(row):
    score = 0
    for gas in gases:
        z_score = (row[f'mean_{gas}'] - global_params[gas]['mu']) / global_params[gas]['sigma']
        score += z_score * GAS_WEIGHTS[gas]
    return score

df['weighted_score'] = df.apply(calculate_weighted_score, axis=1)

def classify_emission(score):
    if score >= THRESHOLDS['high']:
        return "High Emission"
    elif score >= THRESHOLDS['medium']:
        return "Medium Emission"
    else:
        return "Low Emission"

df['emission_level'] = df['weighted_score'].apply(classify_emission)


# THRESHOLDS['high'] = df['weighted_score'].quantile(0.75)  # 25% High Emission
# THRESHOLDS['medium'] = df['weighted_score'].quantile(0.5) # medium 25%~50% is Medium Emission

df_sorted = df.sort_values('weighted_score', ascending=False)
df_sorted.to_csv("Results_with_emission_levels.csv", index=False)

print("\nEmission grade distribution:")
print(df['emission_level'].value_counts())

ghgs: μ=6.17, σ=3.09
ghgs_ch4: μ=1.14, σ=0.77
ghgs_n2o: μ=0.44, σ=0.23

Emission grade distribution:
emission_level
Low Emission       54647
High Emission      10399
Medium Emission     6954
Name: count, dtype: int64
