In [11]:
import requests
import zipfile
import os
import pandas as pd


In [22]:
# 远程文件的 URL
url = 'https://fema.gov/about/reports-and-data/openfema/nfirs_fire_hazmat_pdr_2022.zip'
# 本地保存的文件名
local_zip_file = 'nfirs_fire_hazmat_pdr_2022.zip'
# 第一级解压缩后的文件夹名
first_extracted_folder = 'nfirs_fire_hazmat_pdr_2022'
# 第二级解压缩后的文件夹名
second_extracted_folder = os.path.join(first_extracted_folder, 'NFIRS_FIRES_2022_102623')
# 需要读取的文件路径
target_file = 'NFIRS_FIRES_2022_102623/incidentaddress.txt'
incident_file = 'NFIRS_FIRES_2022_102623/incident.txt'
# 输出的 CSV 文件路径
output_csv_file = os.path.join(second_extracted_folder, 'NFIRS_FIRES_2022_102623', 'incidentaddress.csv')


In [13]:
# 下载文件
response = requests.get(url)
with open(local_zip_file, 'wb') as f:
    f.write(response.content)
print(f"Downloaded {local_zip_file}")


Downloaded nfirs_fire_hazmat_pdr_2022.zip


In [14]:
# 解压缩第一级文件
with zipfile.ZipFile(local_zip_file, 'r') as zip_ref:
    zip_ref.extractall(first_extracted_folder)
print(f"Extracted files to {first_extracted_folder}")


Extracted files to nfirs_fire_hazmat_pdr_2022


In [15]:
# 解压缩第二级文件
second_zip_file = os.path.join(first_extracted_folder, 'nfirs_fire_hazmat_pdr_2022', 'NFIRS_FIRES_2022_102623.zip')
with zipfile.ZipFile(second_zip_file, 'r') as zip_ref:
    zip_ref.extractall(second_extracted_folder)
print(f"Extracted files to {second_extracted_folder}")


Extracted files to nfirs_fire_hazmat_pdr_2022/NFIRS_FIRES_2022_102623


In [16]:
# 列出解压后的目录结构
def list_files(startpath):
    for root, dirs, files in os.walk(startpath):
        level = root.replace(startpath, '').count(os.sep)
        indent = ' ' * 4 * (level)
        print(f"{indent}{os.path.basename(root)}/")
        subindent = ' ' * 4 * (level + 1)
        for f in files:
            print(f"{subindent}{f}")

list_files(second_extracted_folder)

NFIRS_FIRES_2022_102623/
    incidentaddress.txt
    fireincident.txt
    civiliancasualty.txt
    hazchem.txt
    arson.txt
    basicincident.txt
    hazmat.txt
    wildlands.txt
    arsonjuvsub.txt
    ems.txt
    hazmatequipinvolved.txt
    codelookup.txt
    ffcasualty.txt
    fdheader.txt
    hazmobprop.txt
    basicaid.txt
    ffequipfail.txt
    arsonagencyreferal.txt


In [17]:
# 洛杉矶县城市列表
la_cities = [
    "Agoura Hills", "Alhambra", "Arcadia", "Artesia", "Avalon", "Azusa", "Baldwin Park", "Bell", "Bell Gardens", "Bellflower",
    "Beverly Hills", "Bradbury", "Burbank", "Calabasas", "Carson", "Cerritos", "Claremont", "Commerce", "Compton", "Covina",
    "Cudahy", "Culver City", "Diamond Bar", "Downey", "Duarte", "El Monte", "El Segundo", "Gardena", "Glendale", "Glendora",
    "Hawaiian Gardens", "Hawthorne", "Hermosa Beach", "Hidden Hills", "Huntington Park", "Industry", "Inglewood", "Irwindale",
    "La Cañada Flintridge", "La Habra Heights", "La Mirada", "La Puente", "La Verne", "Lakewood", "Lancaster", "Lawndale",
    "Lomita", "Long Beach", "Los Angeles", "Lynwood", "Malibu", "Manhattan Beach", "Maywood", "Monrovia", "Montebello",
    "Monterey Park", "Norwalk", "Palmdale", "Palos Verdes Estates", "Paramount", "Pasadena", "Pico Rivera", "Pomona",
    "Rancho Palos Verdes", "Redondo Beach", "Rolling Hills", "Rolling Hills Estates", "Rosemead", "San Dimas", "San Fernando",
    "San Gabriel", "San Marino", "Santa Clarita", "Santa Fe Springs", "Santa Monica", "Sierra Madre", "Signal Hill",
    "South El Monte", "South Gate", "South Pasadena", "Temple City", "Torrance", "Vernon", "Walnut", "West Covina",
    "West Hollywood", "Westlake Village", "Whittier"
]

In [23]:
# 读取指定文件
incidentaddress_path = os.path.join(second_extracted_folder, 'incidentaddress.txt')
basicincident_path = os.path.join(second_extracted_folder, 'NFIRS_FIRES_2022_102623', 'basicincident.txt')
if os.path.exists(file_path):
    # 使用 pandas 读取文件，指定分隔符为 '^' 和编码为 'latin1'
    try:
        df = pd.read_csv(file_path, delimiter='^', encoding='latin1')
        # 筛选出 STATE 为 CA 且 CITY 为 Alhambra 的数据
        filtered_df = df[(df['STATE'] == 'CA') & (df['CITY'].isin(la_cities))]
        #filtered_df = df[(df['STATE'] == 'CA') & (df['CITY'] == 'Alhambra')]
        # 打印筛选后的数据
        print(filtered_df)
        # 打印前几行
        #print("First 50 rows:")
        #print(df.head(50))
        #print("\nLast 50 rows:")
        #print(df.tail(50))
        
        #print(df.head())
        
    except UnicodeDecodeError as e:
        print(f"Error reading the file with 'latin1' encoding: {e}")
else:
    print(f"Error: The file at path {file_path} does not exist.")

  df = pd.read_csv(file_path, delimiter='^', encoding='latin1')


                       INCIDENT_KEY STATE   FDID  INC_DATE   INC_NO  EXP_NO  \
140611  CA_15010_11242022_2256765_0    CA  15010  11242022  2256765       0   
143872  CA_19005_01022022_2000027_0    CA  19005   1022022  2000027       0   
143873  CA_19005_01032022_2000050_0    CA  19005   1032022  2000050       0   
143874  CA_19005_01042022_2000078_0    CA  19005   1042022  2000078       0   
143875  CA_19005_01062022_2000108_0    CA  19005   1062022  2000108       0   
...                             ...   ...    ...       ...      ...     ...   
273588  CA_39115_02232022_0022064_0    CA  39115   2232022    22064       0   
316980  CA_56020_05162022_0042768_0    CA  56020   5162022    42768       0   
316985  CA_56020_05172022_0043120_0    CA  56020   5172022    43120       0   
317101  CA_56020_06172022_0052889_0    CA  56020   6172022    52889       0   
317458  CA_56020_09032022_0080219_0    CA  56020   9032022    80219       0   

        LOC_TYPE NUM_MILE STREET_PRE     STREETNAME

FileNotFoundError: [Errno 2] No such file or directory: '/Users/hanjiang/Desktop/04Dissertation/datasets/nfirs_fire_hazmat_pdr_2022/NFIRS_FIRES_2022_102623/incidentaddress.txt'