In [2]:
"""
图片合并脚本
将多个文件夹中的图片按指定顺序和尺寸垂直拼接
"""

from PIL import Image
import os

def merge_images(base_path, output_folder, start_num=1, end_num=None):
    """
    合并图片
    
    参数:
    - base_path: 项目根目录路径 (thesignofthecity_nyc 文件夹的路径)
    - output_folder: 输出文件夹路径
    - start_num: 起始图片编号
    - end_num: 结束图片编号 (None表示处理所有找到的图片)
    """
    
    # 定义图片来源和尺寸的配置
    # 格式: (文件夹名, 宽度, 高度)
    layout = [
        ('e-svi', 240, 240),           # e-svi 240x240
        (None, 240, 30),                # 空白 30px
        ('e-category', 240, 30),        # e-category 240x30
        ('e-language', 240, 30),        # e-language 240x30
        ('community_householdincome', 240, 30),  # community income 240x30
        ('community_demographics', 240, 30),     # demographics 240x30
        ('w-language', 240, 30),        # w-language 240x30
        ('w-category', 240, 30),        # w-category 240x30
        (None, 240, 30),                # 空白 30px
        ('w-svi', 240, 240),            # w-svi 240x240
    ]
    
    # 计算总高度
    total_height = sum(item[2] for item in layout)
    width = 240
    
    print(f"每张合并图尺寸: {width} x {total_height} px")
    
    # 创建输出文件夹
    os.makedirs(output_folder, exist_ok=True)
    
    # 确定要处理的图片数量
    if end_num is None:
        # 查找e-svi文件夹中的图片数量来确定
        svi_folder = os.path.join(base_path, 'e-svi')
        if os.path.exists(svi_folder):
            png_files = [f for f in os.listdir(svi_folder) if f.endswith('.png')]
            if png_files:
                # 提取数字并找最大值
                nums = []
                for f in png_files:
                    try:
                        num = int(f.replace('.png', ''))
                        nums.append(num)
                    except ValueError:
                        continue
                end_num = max(nums) if nums else 1
            else:
                end_num = 1
        else:
            print(f"警告: 找不到文件夹 {svi_folder}")
            return
    
    print(f"将处理图片编号: {start_num} 到 {end_num}")
    
    # 处理每张图片
    for img_num in range(start_num, end_num + 1):
        filename = f"{img_num}.png"
        
        # 创建透明背景的画布
        merged = Image.new('RGBA', (width, total_height), (0, 0, 0, 0))
        
        current_y = 0
        success = True
        
        for folder_name, w, h in layout:
            if folder_name is None:
                # 空白区域，保持透明
                current_y += h
                continue
            
            # 构建图片路径
            img_path = os.path.join(base_path, folder_name, filename)
            
            if os.path.exists(img_path):
                try:
                    img = Image.open(img_path).convert('RGBA')
                    # 调整图片大小到指定尺寸
                    img_resized = img.resize((w, h), Image.Resampling.LANCZOS)
                    # 粘贴到画布上
                    merged.paste(img_resized, (0, current_y), img_resized)
                except Exception as e:
                    print(f"  警告: 处理 {img_path} 时出错: {e}")
                    success = False
            else:
                print(f"  警告: 找不到文件 {img_path}")
                success = False
            
            current_y += h
        
        # 保存合并后的图片
        output_path = os.path.join(output_folder, filename)
        merged.save(output_path, 'PNG')
        
        status = "✓" if success else "⚠"
        print(f"{status} 已保存: {output_path}")
    
    print(f"\n完成! 共处理 {end_num - start_num + 1} 张图片")
    print(f"输出位置: {output_folder}")


if __name__ == "__main__":
    # ============ 请修改以下路径 ============
    
    # 你的 thesignofthecity_nyc 文件夹路径
    BASE_PATH = "/Users/lihongqian/Documents/GitHub/thesignofthecity_nyc/"
    
    # 输出文件夹路径
    OUTPUT_FOLDER = "/Users/lihongqian/Documents/GitHub/thesignofthecity_nyc/merged_images"
    
    # ========================================
    
    # 运行合并
    merge_images(
        base_path=BASE_PATH,
        output_folder=OUTPUT_FOLDER,
        start_num=1,      # 从第几张开始
        end_num=None      # 到第几张结束，None表示自动检测
    )

每张合并图尺寸: 240 x 720 px
将处理图片编号: 1 到 1
  警告: 找不到文件 /Users/lihongqian/Documents/GitHub/thesignofthecity_nyc/e-svi/1.png
  警告: 找不到文件 /Users/lihongqian/Documents/GitHub/thesignofthecity_nyc/w-svi/1.png
⚠ 已保存: /Users/lihongqian/Documents/GitHub/thesignofthecity_nyc/merged_images/1.png

完成! 共处理 1 张图片
输出位置: /Users/lihongqian/Documents/GitHub/thesignofthecity_nyc/merged_images


In [6]:
"""
图片合并脚本
将多个文件夹中的图片按指定顺序和尺寸垂直拼接
"""

from PIL import Image
import os

def merge_images(base_path, output_folder):
    """
    合并图片
    """
    
    # 定义图片来源和尺寸的配置
    # 格式: (文件夹名, 宽度, 高度)
    layout = [
        ('e-svi', 240, 240),           # e-svi 240x240
        (None, 240, 30),                # 空白 30px
        ('e-category', 240, 30),        # e-category 240x30
        ('e-language', 240, 30),        # e-language 240x30
        ('community_householdincome', 240, 30),  # community income 240x30
        ('community_demographics', 240, 30),     # demographics 240x30
        ('w-language', 240, 30),        # w-language 240x30
        ('w-category', 240, 30),        # w-category 240x30
        (None, 240, 30),                # 空白 30px
        ('w-svi', 240, 240),            # w-svi 240x240
    ]
    
    # 计算总高度
    total_height = sum(item[2] for item in layout)
    width = 240
    
    print(f"每张合并图尺寸: {width} x {total_height} px")
    
    # 创建输出文件夹
    os.makedirs(output_folder, exist_ok=True)
    
    # 从 e-svi 文件夹获取所有图片文件名
    svi_folder = os.path.join(base_path, 'e-svi')
    if not os.path.exists(svi_folder):
        print(f"错误: 找不到文件夹 {svi_folder}")
        return
    
    # 获取所有图片文件 (jpg, jpeg, png)
    all_files = os.listdir(svi_folder)
    image_files = [f for f in all_files if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    
    if not image_files:
        print(f"错误: {svi_folder} 中没有找到图片文件")
        return
    
    print(f"找到 {len(image_files)} 张图片待处理")
    print(f"示例文件名: {image_files[0]}")
    
    # 处理每张图片
    success_count = 0
    for idx, filename in enumerate(sorted(image_files), 1):
        # 提取文件名（不含扩展名）作为标识
        base_name = os.path.splitext(filename)[0]  # 例如 "1020E"
        
        # 提取纯数字部分 (去掉E或W后缀)
        num_only = base_name.rstrip('EWew')  # "1020E" -> "1020"
        
        # 创建透明背景的画布
        merged = Image.new('RGBA', (width, total_height), (0, 0, 0, 0))
        
        current_y = 0
        has_error = False
        
        for folder_name, w, h in layout:
            if folder_name is None:
                # 空白区域，保持透明
                current_y += h
                continue
            
            # 根据文件夹确定文件名格式
            if folder_name == 'e-svi':
                search_name = num_only + 'E'  # e-svi 用 数字+E
            elif folder_name == 'w-svi':
                search_name = num_only + 'W'  # w-svi 用 数字+W
            else:
                search_name = num_only  # 其他文件夹用纯数字
            
            # 尝试不同的扩展名
            img_path = None
            for ext in ['.png', '.jpg', '.jpeg', '.PNG', '.JPG', '.JPEG']:
                test_path = os.path.join(base_path, folder_name, search_name + ext)
                if os.path.exists(test_path):
                    img_path = test_path
                    break
            
            if img_path:
                try:
                    img = Image.open(img_path).convert('RGBA')
                    # 调整图片大小到指定尺寸
                    img_resized = img.resize((w, h), Image.Resampling.LANCZOS)
                    # 粘贴到画布上
                    merged.paste(img_resized, (0, current_y), img_resized)
                except Exception as e:
                    print(f"  警告: 处理 {img_path} 时出错: {e}")
                    has_error = True
            else:
                print(f"  警告: 找不到文件 {search_name} 在 {folder_name} 文件夹中")
                has_error = True
            
            current_y += h
        
        # 保存合并后的图片 (输出为 png 保持透明，用纯数字命名)
        output_path = os.path.join(output_folder, f"{num_only}.png")
        merged.save(output_path, 'PNG')
        
        if not has_error:
            success_count += 1
        
        # 显示进度
        if idx % 100 == 0 or idx == len(image_files):
            print(f"进度: {idx}/{len(image_files)}")
    
    print(f"\n完成! 成功处理 {success_count}/{len(image_files)} 张图片")
    print(f"输出位置: {output_folder}")


if __name__ == "__main__":
    # ============ 请修改以下路径 ============
    
    BASE_PATH = "/Users/lihongqian/Documents/GitHub/thesignofthecity_nyc"
    OUTPUT_FOLDER = "/Users/lihongqian/Documents/GitHub/thesignofthecity_nyc/merged_images"
    
    # ========================================
    
    merge_images(base_path=BASE_PATH, output_folder=OUTPUT_FOLDER)

每张合并图尺寸: 240 x 720 px
找到 4267 张图片待处理
示例文件名: 135E.jpg
进度: 100/4267
进度: 200/4267
进度: 300/4267


KeyboardInterrupt: 

In [4]:
ls "/Users/lihongqian/Documents/GitHub/thesignofthecity_nyc/e-category" | head -5

1.png
10.png
100.png
1000.png
1001.png
ls: stdout: Undefined error: 0


In [9]:
"""
图片合并脚本
将多个文件夹中的图片按指定顺序和尺寸垂直拼接
"""

from PIL import Image
import os

def merge_images(base_path, output_folder):
    """
    合并图片
    """
    
    # 定义图片来源和尺寸的配置
    # 格式: (文件夹名, 宽度, 高度)
    layout = [
        ('e-svi', 240, 240),           # e-svi 240x240
        (None, 240, 30),                # 空白 30px
        ('e-category', 240, 30),        # e-category 240x30
        (None, 240, 10),                # 空白 10px
        ('e-language', 240, 30),        # e-language 240x30
        (None, 240, 10),                # 空白 10px
        ('community_householdincome', 240, 30),  # community income 240x30
        (None, 240, 10),                # 空白 10px
        ('community_demographics', 240, 30),     # demographics 240x30
        (None, 240, 10),                # 空白 10px
        ('w-language', 240, 30),        # w-language 240x30
        (None, 240, 10),                # 空白 10px
        ('w-category', 240, 30),        # w-category 240x30
        (None, 240, 30),                # 空白 30px
        ('w-svi', 240, 240),            # w-svi 240x240
    ]
    
    # 计算总高度
    total_height = sum(item[2] for item in layout)
    width = 240
    
    print(f"每张合并图尺寸: {width} x {total_height} px")
    
    # 创建输出文件夹
    os.makedirs(output_folder, exist_ok=True)
    
    # 从 e-svi 文件夹获取所有图片文件名
    svi_folder = os.path.join(base_path, 'e-svi')
    if not os.path.exists(svi_folder):
        print(f"错误: 找不到文件夹 {svi_folder}")
        return
    
    # 获取所有图片文件 (jpg, jpeg, png)
    all_files = os.listdir(svi_folder)
    image_files = [f for f in all_files if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    
    if not image_files:
        print(f"错误: {svi_folder} 中没有找到图片文件")
        return
    
    print(f"找到 {len(image_files)} 张图片待处理")
    print(f"示例文件名: {image_files[0]}")
    
    # 处理每张图片
    success_count = 0
    for idx, filename in enumerate(sorted(image_files), 1):
        # 提取文件名（不含扩展名）作为标识
        base_name = os.path.splitext(filename)[0]  # 例如 "1020E"
        
        # 提取纯数字部分 (去掉E或W后缀)
        num_only = base_name.rstrip('EWew')  # "1020E" -> "1020"
        
        # 创建透明背景的画布
        merged = Image.new('RGBA', (width, total_height), (0, 0, 0, 0))
        
        current_y = 0
        has_error = False
        
        for folder_name, w, h in layout:
            if folder_name is None:
                # 空白区域，保持透明
                current_y += h
                continue
            
            # 根据文件夹确定文件名格式
            if folder_name == 'e-svi':
                search_name = num_only + 'E'  # e-svi 用 数字+E
            elif folder_name == 'w-svi':
                search_name = num_only + 'W'  # w-svi 用 数字+W
            else:
                search_name = num_only  # 其他文件夹用纯数字
            
            # 尝试不同的扩展名
            img_path = None
            for ext in ['.png', '.jpg', '.jpeg', '.PNG', '.JPG', '.JPEG']:
                test_path = os.path.join(base_path, folder_name, search_name + ext)
                if os.path.exists(test_path):
                    img_path = test_path
                    break
            
            if img_path:
                try:
                    img = Image.open(img_path).convert('RGBA')
                    # 调整图片大小到指定尺寸
                    img_resized = img.resize((w, h), Image.Resampling.LANCZOS)
                    # 粘贴到画布上
                    merged.paste(img_resized, (0, current_y), img_resized)
                except Exception as e:
                    print(f"  警告: 处理 {img_path} 时出错: {e}")
                    has_error = True
            else:
                print(f"  警告: 找不到文件 {search_name} 在 {folder_name} 文件夹中")
                has_error = True
            
            current_y += h
        
        # 保存合并后的图片 (输出为 png 保持透明，用纯数字命名)
        output_path = os.path.join(output_folder, f"{num_only}.png")
        merged.save(output_path, 'PNG')
        
        if not has_error:
            success_count += 1
        
        # 显示进度
        if idx % 100 == 0 or idx == len(image_files):
            print(f"进度: {idx}/{len(image_files)}")
    
    print(f"\n完成! 成功处理 {success_count}/{len(image_files)} 张图片")
    print(f"输出位置: {output_folder}")


if __name__ == "__main__":
    # ============ 请修改以下路径 ============
    
    BASE_PATH = "/Users/lihongqian/Documents/GitHub/thesignofthecity_nyc"
    OUTPUT_FOLDER = "/Users/lihongqian/Documents/GitHub/thesignofthecity_nyc/merged_images"
    
    # ========================================
    
    merge_images(base_path=BASE_PATH, output_folder=OUTPUT_FOLDER)

每张合并图尺寸: 240 x 770 px
找到 4267 张图片待处理
示例文件名: 135E.jpg
进度: 100/4267
进度: 200/4267
进度: 300/4267
进度: 400/4267
进度: 500/4267
进度: 600/4267
进度: 700/4267
进度: 800/4267
进度: 900/4267
进度: 1000/4267
进度: 1100/4267
进度: 1200/4267
进度: 1300/4267
进度: 1400/4267
进度: 1500/4267
进度: 1600/4267
进度: 1700/4267
进度: 1800/4267
进度: 1900/4267
进度: 2000/4267
进度: 2100/4267
进度: 2200/4267
进度: 2300/4267
进度: 2400/4267
进度: 2500/4267
进度: 2600/4267
进度: 2700/4267
进度: 2800/4267
进度: 2900/4267
进度: 3000/4267
进度: 3100/4267
进度: 3200/4267
进度: 3300/4267
进度: 3400/4267
进度: 3500/4267
进度: 3600/4267
进度: 3700/4267
进度: 3800/4267
进度: 3900/4267
进度: 4000/4267
进度: 4100/4267
进度: 4200/4267
进度: 4267/4267

完成! 成功处理 4267/4267 张图片
输出位置: /Users/lihongqian/Documents/GitHub/thesignofthecity_nyc/merged_images


In [10]:
"""
处理 CSV 并生成 JSON
1. 过滤掉 Language 和 Category 都为空的条目
2. 使用合并后的图片路径
"""

import csv
import json
import os

def process_csv_to_json(csv_path, output_json_path, merged_images_folder="merged_images"):
    """
    读取 CSV，过滤并生成 JSON
    """
    
    results = []
    skipped = 0
    
    with open(csv_path, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        
        for row in reader:
            svi_id = row['SVI_ID']
            language = row.get('Language', '').strip()
            category = row.get('Category', '').strip()
            
            # 过滤掉 Language 和 Category 都为空的条目
            if not language and not category:
                skipped += 1
                continue
            
            # 构建 JSON 条目
            entry = {
                "id": svi_id,
                "community": row.get('NTAName', ''),
                "language_full": row.get('Language-full', ''),
                "category": category,
                "longitude": float(row['Longitude']) if row.get('Longitude') else None,
                "latitude": float(row['Latitude']) if row.get('Latitude') else None,
                "full_street": row.get('full_stree', ''),
                "zip": row.get('l_zip', ''),
                "post_type": row.get('post_type', ''),
                "content": row.get('Content', ''),
                "image_name": row.get('Image Name', ''),
                # 使用合并后的图片路径
                "merged_image": f"{merged_images_folder}/{svi_id}.png"
            }
            
            results.append(entry)
    
    # 保存 JSON
    with open(output_json_path, 'w', encoding='utf-8') as f:
        json.dump(results, f, ensure_ascii=False, indent=2)
    
    print(f"处理完成!")
    print(f"  - 保留条目: {len(results)}")
    print(f"  - 跳过条目 (Language和Category都为空): {skipped}")
    print(f"  - 输出文件: {output_json_path}")
    
    return results


if __name__ == "__main__":
    # ============ 请修改以下路径 ============
    
    CSV_PATH = "/Users/lihongqian/Documents/Spring2024/Website/route-data-overall_final.csv"
    OUTPUT_JSON = "/Users/lihongqian/Documents/GitHub/thesignofthecity_nyc/data_filtered.json"
    MERGED_FOLDER = "merged_images"  # 合并图片文件夹名（相对路径）
    
    # ========================================
    
    process_csv_to_json(CSV_PATH, OUTPUT_JSON, MERGED_FOLDER)

处理完成!
  - 保留条目: 1017
  - 跳过条目 (Language和Category都为空): 3250
  - 输出文件: /Users/lihongqian/Documents/GitHub/thesignofthecity_nyc/data_filtered.json


In [11]:
"""
处理 CSV 并生成 JSON
1. 从三个 CSV 文件读取数据（overall, e, w）
2. 过滤掉 e 和 w 的 Language 和 Category 都为空的条目
3. 使用合并后的图片路径
4. 包含 e_text 和 w_text 字段
"""

import csv
import json
import os

def process_csv_to_json(overall_csv, e_csv, w_csv, output_json_path, merged_images_folder="merged_images"):
    """
    读取三个 CSV，合并数据并生成 JSON
    """
    
    # 读取 e CSV (获取 e_text)
    e_data = {}
    with open(e_csv, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            svi_id = row['SVI_ID']
            e_data[svi_id] = {
                'content': row.get('Content', ''),
                'language': row.get('Language', '').strip(),
                'language_full': row.get('Language-full', ''),
                'category': row.get('Category', '').strip()
            }
    
    print(f"读取 E CSV: {len(e_data)} 条记录")
    
    # 读取 w CSV (获取 w_text)
    w_data = {}
    with open(w_csv, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            svi_id = row['SVI_ID']
            w_data[svi_id] = {
                'content': row.get('Content', ''),
                'language': row.get('Language', '').strip(),
                'language_full': row.get('Language-full', ''),
                'category': row.get('Category', '').strip()
            }
    
    print(f"读取 W CSV: {len(w_data)} 条记录")
    
    # 读取 overall CSV (获取基础信息)
    results = []
    skipped = 0
    
    with open(overall_csv, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        
        for row in reader:
            svi_id = row['SVI_ID']
            
            # 获取 e 和 w 的数据
            e_info = e_data.get(svi_id, {})
            w_info = w_data.get(svi_id, {})
            
            # 判断条件：e 和 w 的 Language 和 Category 都为空才跳过
            e_has_content = bool(e_info.get('language') or e_info.get('category'))
            w_has_content = bool(w_info.get('language') or w_info.get('category'))
            
            if not e_has_content and not w_has_content:
                skipped += 1
                continue
            
            # 合并 language_full (e 和 w)
            e_lang_full = e_info.get('language_full', '')
            w_lang_full = w_info.get('language_full', '')
            if e_lang_full and w_lang_full:
                combined_lang_full = f"{e_lang_full};{w_lang_full}"
            else:
                combined_lang_full = e_lang_full or w_lang_full
            
            # 合并 category (e 和 w)
            e_category = e_info.get('category', '')
            w_category = w_info.get('category', '')
            if e_category and w_category:
                combined_category = f"{e_category};{w_category}"
            else:
                combined_category = e_category or w_category
            
            # 构建 JSON 条目
            entry = {
                "id": svi_id,
                "community": row.get('NTAName', ''),
                "language_full": combined_lang_full,
                "category": combined_category,
                "e_text": e_info.get('content', ''),
                "w_text": w_info.get('content', ''),
                "merged_image": f"{merged_images_folder}/{svi_id}.png"
            }
            
            results.append(entry)
    
    # 保存 JSON
    with open(output_json_path, 'w', encoding='utf-8') as f:
        json.dump(results, f, ensure_ascii=False, indent=2)
    
    print(f"\n处理完成!")
    print(f"  - 保留条目: {len(results)}")
    print(f"  - 跳过条目 (e和w的Language和Category都为空): {skipped}")
    print(f"  - 输出文件: {output_json_path}")
    
    return results


if __name__ == "__main__":
    # ============ 请修改以下路径 ============
    
    BASE_PATH = "/Users/lihongqian/Documents/Spring2024/Website/"
    
    OVERALL_CSV = f"{BASE_PATH}/route-data-overall_final.csv"
    E_CSV = f"{BASE_PATH}/route-data-e_final_community.csv"
    W_CSV = f"{BASE_PATH}/route-data-w_final_community.csv"
    
    OUTPUT_JSON = f"{BASE_PATH}/data_final.json"
    MERGED_FOLDER = "merged_images"  # 合并图片文件夹名（相对路径）
    
    # ========================================
    
    process_csv_to_json(OVERALL_CSV, E_CSV, W_CSV, OUTPUT_JSON, MERGED_FOLDER)

读取 E CSV: 4267 条记录
读取 W CSV: 4267 条记录

处理完成!
  - 保留条目: 1597
  - 跳过条目 (e和w的Language和Category都为空): 2670
  - 输出文件: /Users/lihongqian/Documents/Spring2024/Website//data_final.json


In [12]:
"""
处理 CSV 并生成 JSON
1. 从四个 CSV 文件读取数据（overall, e, w, community_makeup）
2. 过滤掉 e 和 w 的 Language 和 Category 都为空的条目
3. 过滤掉 Median household income 为 0 的条目
4. 使用合并后的图片路径
5. 包含 e_text 和 w_text 字段
"""

import csv
import json
import os

def process_csv_to_json(overall_csv, e_csv, w_csv, community_csv, output_json_path, merged_images_folder="merged_images"):
    """
    读取四个 CSV，合并数据并生成 JSON
    """
    
    # 读取 community CSV (获取 median household income)
    community_data = {}
    with open(community_csv, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            svi_id = row['SVI_ID']
            try:
                income = float(row.get('Median household income', 0))
            except (ValueError, TypeError):
                income = 0
            community_data[svi_id] = {
                'median_household_income': income
            }
    
    print(f"读取 Community CSV: {len(community_data)} 条记录")
    
    # 读取 e CSV (获取 e_text)
    e_data = {}
    with open(e_csv, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            svi_id = row['SVI_ID']
            e_data[svi_id] = {
                'content': row.get('Content', ''),
                'language': row.get('Language', '').strip(),
                'language_full': row.get('Language-full', ''),
                'category': row.get('Category', '').strip()
            }
    
    print(f"读取 E CSV: {len(e_data)} 条记录")
    
    # 读取 w CSV (获取 w_text)
    w_data = {}
    with open(w_csv, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            svi_id = row['SVI_ID']
            w_data[svi_id] = {
                'content': row.get('Content', ''),
                'language': row.get('Language', '').strip(),
                'language_full': row.get('Language-full', ''),
                'category': row.get('Category', '').strip()
            }
    
    print(f"读取 W CSV: {len(w_data)} 条记录")
    
    # 读取 overall CSV (获取基础信息)
    results = []
    skipped_empty = 0
    skipped_income = 0
    
    with open(overall_csv, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        
        for row in reader:
            svi_id = row['SVI_ID']
            
            # 获取 community 数据
            comm_info = community_data.get(svi_id, {})
            income = comm_info.get('median_household_income', 0)
            
            # 筛选条件 1: Median household income 为 0 跳过
            if income == 0:
                skipped_income += 1
                continue
            
            # 获取 e 和 w 的数据
            e_info = e_data.get(svi_id, {})
            w_info = w_data.get(svi_id, {})
            
            # 筛选条件 2: e 和 w 的 Language 和 Category 都为空才跳过
            e_has_content = bool(e_info.get('language') or e_info.get('category'))
            w_has_content = bool(w_info.get('language') or w_info.get('category'))
            
            if not e_has_content and not w_has_content:
                skipped_empty += 1
                continue
            
            # 合并 language_full (e 和 w)
            e_lang_full = e_info.get('language_full', '')
            w_lang_full = w_info.get('language_full', '')
            if e_lang_full and w_lang_full:
                combined_lang_full = f"{e_lang_full};{w_lang_full}"
            else:
                combined_lang_full = e_lang_full or w_lang_full
            
            # 合并 category (e 和 w)
            e_category = e_info.get('category', '')
            w_category = w_info.get('category', '')
            if e_category and w_category:
                combined_category = f"{e_category};{w_category}"
            else:
                combined_category = e_category or w_category
            
            # 构建 JSON 条目
            entry = {
                "id": svi_id,
                "community": row.get('NTAName', ''),
                "language_full": combined_lang_full,
                "category": combined_category,
                "e_text": e_info.get('content', ''),
                "w_text": w_info.get('content', ''),
                "merged_image": f"{merged_images_folder}/{svi_id}.png"
            }
            
            results.append(entry)
    
    # 保存 JSON
    with open(output_json_path, 'w', encoding='utf-8') as f:
        json.dump(results, f, ensure_ascii=False, indent=2)
    
    print(f"\n处理完成!")
    print(f"  - 保留条目: {len(results)}")
    print(f"  - 跳过条目 (Median household income 为 0): {skipped_income}")
    print(f"  - 跳过条目 (e和w的Language和Category都为空): {skipped_empty}")
    print(f"  - 输出文件: {output_json_path}")
    
    return results


if __name__ == "__main__":
    # ============ 请修改以下路径 ============
    
    BASE_PATH = "/Users/lihongqian/Documents/Spring2024/Website/"
    
    OVERALL_CSV = f"{BASE_PATH}/route-data-overall_final.csv"
    E_CSV = f"{BASE_PATH}/route-data-e_final_community.csv"
    W_CSV = f"{BASE_PATH}/route-data-w_final_community.csv"
    COMMUNITY_CSV = f"{BASE_PATH}/overall_communitymakeup.csv"
    
    OUTPUT_JSON = f"{BASE_PATH}/data_final.json"
    MERGED_FOLDER = "merged_images"  # 合并图片文件夹名（相对路径）
    
    # ========================================
    
    process_csv_to_json(OVERALL_CSV, E_CSV, W_CSV, COMMUNITY_CSV, OUTPUT_JSON, MERGED_FOLDER)

读取 Community CSV: 4267 条记录
读取 E CSV: 4267 条记录
读取 W CSV: 4267 条记录

处理完成!
  - 保留条目: 1456
  - 跳过条目 (Median household income 为 0): 782
  - 跳过条目 (e和w的Language和Category都为空): 2029
  - 输出文件: /Users/lihongqian/Documents/Spring2024/Website//data_final.json
