In [1]:
import pandas as pd

file_path = 'merged_with_filenames.csv'
df = pd.read_csv(file_path)

# 移除 'File Name' 列中包含缺失值的行
df = df.dropna(subset=['File Name'])

# 检查评分列的数据类型并进行转换
df['评分'] = pd.to_numeric(df['评分'], errors='coerce')

# 移除 '评分' 列中包含缺失值的行
df = df.dropna(subset=['评分'])

# 每个 'File Name' 中评分最高的行
max_score_df = df.loc[df.groupby('File Name')['评分'].idxmax()]

output_path = '城市最佳景点游览原则.csv'
max_score_df.to_csv(output_path, index=False)


In [2]:
csv_file_path = '城市最佳景点游览原则.csv'
excel_file_path = 'C题最终版数据集.xlsx'

scenic_data = pd.read_csv(csv_file_path)

city_attributes_data = pd.read_excel(excel_file_path)

# 合并景点数据和城市属性数据
merged_data = pd.merge(scenic_data, city_attributes_data, left_on='File Name', right_on='来源城市')

output_path = '各城市指标最终版.csv'
merged_data.to_csv(output_path, index=False)


In [3]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# 读取数据
city_indicators_path = '各城市指标最终版.csv'
city_indicators_data = pd.read_csv(city_indicators_path)

# 标准化数据
attributes = [
    'AQI', '绿化覆盖率 (%)', '废水处理率 (%)', '废气处理率 (%)', '垃圾分类处理率 (%)', 
    '历史遗迹数量', '博物馆数量', '文化活动频次', '文化设施数量', '公共交通覆盖率 (%)', 
    '线路密度 (km/km²)', '高速公路里程 (km)', '机场航班数量', '年平均气温 (℃)', 
    '年降水量 (mm)', '适宜旅游天数', '空气湿度 (%)', '餐馆数量', '特色美食数量', '美食活动频次'
]
scaler = MinMaxScaler()
city_indicators_data[attributes] = scaler.fit_transform(city_indicators_data[attributes])

reference_sequence = np.ones(city_indicators_data[attributes].shape[1])

# 计算灰色关联系数
def grey_relational_coefficient(sequence, reference_sequence, rho=0.5):
    min_diff = np.min(np.abs(sequence - reference_sequence))
    max_diff = np.max(np.abs(sequence - reference_sequence))
    return (min_diff + rho * max_diff) / (np.abs(sequence - reference_sequence) + rho * max_diff)

# 计算灰色关联度
def grey_relational_grade(data, reference_sequence):
    return data.apply(lambda row: np.mean(grey_relational_coefficient(row.values, reference_sequence)), axis=1)

# 计算每个城市的灰色关联度
city_indicators_data['灰色关联度'] = grey_relational_grade(city_indicators_data[attributes], reference_sequence)

# 根据灰色关联度排序并选择前50个城市
top_50_cities = city_indicators_data.sort_values(by='灰色关联度', ascending=False).head(50)

output_path = '最令外国游客向往的50个城市（灰色关联）最终版.csv'
top_50_cities.to_csv(output_path, index=False)
