In [1]:
import requests
import zipfile
import os
import pandas as pd


In [9]:
# 远程文件的 URL
url = 'https://fema.gov/about/reports-and-data/openfema/nfirs_fire_hazmat_pdr_2022.zip'
# 本地保存的文件名
local_zip_file = 'nfirs_fire_hazmat_pdr_2022.zip'
# 第一级解压缩后的文件夹名
first_extracted_folder = 'nfirs_fire_hazmat_pdr_2022'
# 第二级解压缩后的文件夹名
second_extracted_folder = os.path.join(first_extracted_folder, 'NFIRS_FIRES_2022_102623')
# 需要读取的文件路径
target_file = 'NFIRS_FIRES_2022_102623/ffcasualty.txt'
incident_file = 'NFIRS_FIRES_2022_102623/incident.txt'
# 输出的 CSV 文件路径
output_csv_file = os.path.join(second_extracted_folder, 'NFIRS_FIRES_2022_102623', 'filitered_ffcasualty.csv')


In [3]:
# 下载文件
response = requests.get(url)
with open(local_zip_file, 'wb') as f:
    f.write(response.content)
print(f"Downloaded {local_zip_file}")


Downloaded nfirs_fire_hazmat_pdr_2022.zip


In [4]:
# 解压缩第一级文件
with zipfile.ZipFile(local_zip_file, 'r') as zip_ref:
    zip_ref.extractall(first_extracted_folder)
print(f"Extracted files to {first_extracted_folder}")


Extracted files to nfirs_fire_hazmat_pdr_2022


In [5]:
# 解压缩第二级文件
second_zip_file = os.path.join(first_extracted_folder, 'nfirs_fire_hazmat_pdr_2022', 'NFIRS_FIRES_2022_102623.zip')
with zipfile.ZipFile(second_zip_file, 'r') as zip_ref:
    zip_ref.extractall(second_extracted_folder)
print(f"Extracted files to {second_extracted_folder}")


Extracted files to nfirs_fire_hazmat_pdr_2022/NFIRS_FIRES_2022_102623


In [6]:
# 列出解压后的目录结构
def list_files(startpath):
    for root, dirs, files in os.walk(startpath):
        level = root.replace(startpath, '').count(os.sep)
        indent = ' ' * 4 * (level)
        print(f"{indent}{os.path.basename(root)}/")
        subindent = ' ' * 4 * (level + 1)
        for f in files:
            print(f"{subindent}{f}")

list_files(second_extracted_folder)

NFIRS_FIRES_2022_102623/
    incidentaddress.txt
    fireincident.txt
    civiliancasualty.txt
    hazchem.txt
    arson.txt
    basicincident.txt
    hazmat.txt
    wildlands.txt
    arsonjuvsub.txt
    ems.txt
    hazmatequipinvolved.txt
    codelookup.txt
    ffcasualty.txt
    fdheader.txt
    hazmobprop.txt
    basicaid.txt
    ffequipfail.txt
    filtered_basicincident.csv
    arsonagencyreferal.txt


In [7]:
# 洛杉矶县城市列表
la_cities = [
    "Agoura Hills", "Alhambra", "Arcadia", "Artesia", "Avalon", "Azusa", "Baldwin Park", "Bell", "Bell Gardens", "Bellflower",
    "Beverly Hills", "Bradbury", "Burbank", "Calabasas", "Carson", "Cerritos", "Claremont", "Commerce", "Compton", "Covina",
    "Cudahy", "Culver City", "Diamond Bar", "Downey", "Duarte", "El Monte", "El Segundo", "Gardena", "Glendale", "Glendora",
    "Hawaiian Gardens", "Hawthorne", "Hermosa Beach", "Hidden Hills", "Huntington Park", "Industry", "Inglewood", "Irwindale",
    "La Cañada Flintridge", "La Habra Heights", "La Mirada", "La Puente", "La Verne", "Lakewood", "Lancaster", "Lawndale",
    "Lomita", "Long Beach", "Los Angeles", "Lynwood", "Malibu", "Manhattan Beach", "Maywood", "Monrovia", "Montebello",
    "Monterey Park", "Norwalk", "Palmdale", "Palos Verdes Estates", "Paramount", "Pasadena", "Pico Rivera", "Pomona",
    "Rancho Palos Verdes", "Redondo Beach", "Rolling Hills", "Rolling Hills Estates", "Rosemead", "San Dimas", "San Fernando",
    "San Gabriel", "San Marino", "Santa Clarita", "Santa Fe Springs", "Santa Monica", "Sierra Madre", "Signal Hill",
    "South El Monte", "South Gate", "South Pasadena", "Temple City", "Torrance", "Vernon", "Walnut", "West Covina",
    "West Hollywood", "Westlake Village", "Whittier"
]

In [16]:
import os
import pandas as pd

# 读取指定文件
file_path = os.path.join(second_extracted_folder, 'ffcasualty.txt')

if os.path.exists(file_path):
    # 使用 pandas 读取文件，指定分隔符为 '^' 和编码为 'latin1'
    try:
        df = pd.read_csv(file_path, delimiter='^', encoding='latin1')
        # 保留关键指标
        key_columns = ['INCIDENT_KEY', 'STATE', 'FDID', 'INC_DATE', 'INC_NO', 'EXP_NO', 
                       'FF_SEQ_NO', 'GENDER', 'CAREER', 'AGE', 'INJ_DATE', 'SEVERITY', 
                       'ACTIVITY', 'CAUSE', 'FACTOR']
        df = df[key_columns]
        # 删除这些列中的缺失值
        df = df.dropna()
        print(df)
    except UnicodeDecodeError as e:
        print(f"Error reading the file with 'latin1' encoding: {e}")
else:
    print(f"Error: The file at path {file_path} does not exist.")


                    INCIDENT_KEY STATE   FDID  INC_DATE  INC_NO  EXP_NO  \
1     AK_11100_11262022_227808_0    AK  11100  11262022  227808       0   
2     AK_11100_11262022_227808_0    AK  11100  11262022  227808       0   
3     AK_11100_11262022_227808_0    AK  11100  11262022  227808       0   
5       AK_13000_10132022_2088_0    AK  13000  10132022    2088       0   
6       AK_13000_10132022_2088_0    AK  13000  10132022    2088       0   
...                          ...   ...    ...       ...     ...     ...   
8160     WY_07437_07072022_233_0    WY  07437   7072022     233       0   
8161      WY_21307_01032022_85_0    WY  21307   1032022      85       0   
8162    WY_21307_04242022_3018_0    WY  21307   4242022    3018       0   
8164    WY_21411_12232022_1791_0    WY  21411  12232022    1791       0   
8165  WY_23527_10102022_220063_0    WY  23527  10102022  220063       0   

      FF_SEQ_NO  GENDER  CAREER  AGE      INJ_DATE SEVERITY ACTIVITY CAUSE  \
1             1      

In [11]:
# 读取指定文件
file_path = os.path.join(second_extracted_folder, 'ffcasualty.txt')

if os.path.exists(file_path):
    # 使用 pandas 读取文件，指定分隔符为 '^' 和编码为 'latin1'
    try:
        df = pd.read_csv(file_path, delimiter='^', encoding='latin1')
        print(df)
    except UnicodeDecodeError as e:
        print(f"Error reading the file with 'latin1' encoding: {e}")
else:
    print(f"Error: The file at path {file_path} does not exist.")

                    INCIDENT_KEY STATE   FDID  INC_DATE  INC_NO  EXP_NO  \
0     AK_11100_05222022_223075_0    AK  11100   5222022  223075       0   
1     AK_11100_11262022_227808_0    AK  11100  11262022  227808       0   
2     AK_11100_11262022_227808_0    AK  11100  11262022  227808       0   
3     AK_11100_11262022_227808_0    AK  11100  11262022  227808       0   
4     AK_11100_12202022_228285_0    AK  11100  12202022  228285       0   
...                          ...   ...    ...       ...     ...     ...   
8161      WY_21307_01032022_85_0    WY  21307   1032022      85       0   
8162    WY_21307_04242022_3018_0    WY  21307   4242022    3018       0   
8163    WY_21307_04242022_3018_0    WY  21307   4242022    3018       0   
8164    WY_21411_12232022_1791_0    WY  21411  12232022    1791       0   
8165  WY_23527_10102022_220063_0    WY  23527  10102022  220063       0   

      FF_SEQ_NO  VERSION  GENDER  CAREER  ...  PABI  CAUSE  FACTOR  OBJECT  \
0             1      