In [5]:
import pandas as pd
import numpy as np

# 1) 读入数据，假设文件名为 'car_data.csv'
#    na_values=['-'] 能把 CSV 里出现的 '-' 自动识别为 NaN
df = pd.read_csv("./car_data.csv", na_values=['-'])

# 2) 将相关列强制转换为数值(如果有无法解析的字符串，也会变成 NaN)
numeric_cols = [
    'Horsepower(hp)',
    'Acceleration 0 to 100 km/h (seconds)',
    'Average_energy_consumption_1 (kWh/100km)',
    'Average_energy_consumption_2_3 (L/100km)'
]
for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# 3) 如果要计算排放/价格，需要这些列不为空，这里演示直接丢弃有 NaN 的行
#    (也可以使用 fillna() 补默认值，但要保证不会带来逻辑错误)
df.dropna(
    subset=[
        'Horsepower(hp)',
        'Acceleration 0 to 100 km/h (seconds)',
        'Average_energy_consumption_1 (kWh/100km)',
        'Average_energy_consumption_2_3 (L/100km)',
        'Energy_type'
    ],
    inplace=True
)

# 
co2_intensity_elec = 0.408  # kg CO2/kWh
co2_intensity_fuel = 2.31   # kg CO2/L
price_elec = 0.3            # SGD/kWh
price_fuel = 2.6            # SGD/L

def calc_emission_and_price(row):
    etype = row['Energy_type']
    if etype == 1:
        # 电动车，用 kWh/100km
        consumption = row['Average_energy_consumption_1 (kWh/100km)']
        ghg_100km = consumption * co2_intensity_elec   # kg CO2 per 100 km
        price = price_elec
    else:
        # 混动/燃油，用 L/100km
        consumption = row['Average_energy_consumption_2_3 (L/100km)']
        ghg_100km = consumption * co2_intensity_fuel
        price = price_fuel

    ghg_10000km = ghg_100km * 100
    return ghg_10000km, price

df['GHG_per_10000km'], df['Energy_price'] = zip(
    *df.apply(calc_emission_and_price, axis=1)
)

def car_score(row, beta, gamma, sigma):
    hp = row['Horsepower(hp)']
    accel_time = row['Acceleration 0 to 100 km/h (seconds)']
    ghg_10k = row['GHG_per_10000km']
    price = row['Energy_price']

    # (horsepower * accel^{-1})^beta * (ghg)^{-gamma} * (price)^{-sigma}
    performance_factor = hp * (1.0 / accel_time)
    return (performance_factor**beta) * (ghg_10k**(-gamma)) * (price**(-sigma))

# 根据四种偏好分别计算新列
df['Score_perf'] = df.apply(lambda r: car_score(r, 0.8, 0.1, 0.1), axis=1)
df['Score_balanced'] = df.apply(lambda r: car_score(r, 0.4, 0.3, 0.3), axis=1)
df['Score_env'] = df.apply(lambda r: car_score(r, 0.2, 0.7, 0.1), axis=1)
df['Score_daily'] = df.apply(lambda r: car_score(r, 0.1, 0.1, 0.8), axis=1)

# 查看结果
print(df[[
    'Company', 'Car_type', 
    'Horsepower(hp)', 'Acceleration 0 to 100 km/h (seconds)', 
    'GHG_per_10000km', 'Energy_price', 
    'Score_perf', 'Score_balanced', 'Score_env', 'Score_daily'
]])

# 可保存回CSV
df.to_csv("car_data_scored.csv", index=False)


    Company                                  Car_type  Horsepower(hp)  \
85   Toyota  Toyota RAV4 2.5 Plug-in Hybrid AWD Specs           306.0   
91   Toyota            Prius 2.0 Plug-In Hybrid Specs           223.0   
180    Audi            A3 allstreet 40 TFSIe S tronic           204.0   
205     BMW            320e Plug-in Hybrid Steptronic           204.0   
208     BMW            330e Plug-in Hybrid Steptronic           292.0   
216     BMW            530e Plug-in Hybrid Steptronic           299.0   
220     BMW     550e Plug-in Hybrid xDrive Steptronic           489.0   

     Acceleration 0 to 100 km/h (seconds)  GHG_per_10000km  Energy_price  \
85                                    6.0            231.0           2.6   
91                                    6.7            115.5           2.6   
180                                   7.4             69.3           2.6   
205                                   7.6            346.5           2.6   
208                                