In [7]:
import json
import pandas as pd
from shapely.geometry import Point, Polygon, MultiPolygon

In [8]:
def create_province_boundaries(geojson_data):
    """
    Create a dictionary of province boundaries from GeoJSON features
    Returns: dict of {province_name: (boundary_polygon, thai_name)}
    """
    province_boundaries = {}

    for feature in geojson_data['features']:
        try:
            # Get province names
            province_name = feature['properties']['NAME_1']
            # Clean up province name by removing prefixes
            province_name_th = feature['properties']['NL_NAME_1']
            province_name_th = province_name_th.replace('จังหวัด', '').replace('อำเภอเมือง', '').replace('พระนครศรี', '').strip()

            # Create MultiPolygon from coordinates
            coordinates = feature['geometry']['coordinates']
            boundary = MultiPolygon([Polygon(coords[0]) for coords in coordinates])

            # Store both the boundary and Thai name
            province_boundaries[province_name] = (boundary, province_name_th)

        except Exception as e:
            print(f"Error processing province {province_name}: {e}")
            continue

    return province_boundaries

In [9]:
def find_province_for_point(lat, lon, province_boundaries):
    """
    Find which province contains the given point
    Returns: Thai name of the province or empty string if not found
    """
    point = Point(lon, lat)

    for boundary, thai_name in province_boundaries.values():
        if boundary.contains(point):
            return thai_name

    return ""

In [22]:
def process_accident_data(input_csv, boundary_json, output_csv):
    """
    Process accident data and determine province for each location
    """
    # Read the GeoJSON data with all province boundaries
    with open(boundary_json, 'r', encoding='utf-8') as f:
        geojson_data = json.load(f)

    # Create province boundaries dictionary
    print("Processing province boundaries...")
    province_boundaries = create_province_boundaries(geojson_data)
    print(f"Loaded {len(province_boundaries)} province boundaries")

    # Read the accident data
    df = pd.read_csv(input_csv)
    total_rows = len(df)
    print(f"Processing {total_rows} accident records...")

    # Initialize the province column
    df['province'] = ''

    # Process each accident location
    for idx, row in df.iterrows():
        try:
            lat = float(row['LATITUDE'])
            lon = float(row['LONGITUDE'])

            province = find_province_for_point(lat, lon, province_boundaries)
            df.at[idx, 'province'] = province

            # Print progress every 1000 rows
            if idx % 1000 == 0:
                progress = (idx / total_rows) * 100
                print(f"Processed {idx} rows ({progress:.1f}%)...")

        except (ValueError, TypeError) as e:
            print(f"Error processing row {idx}: {e}")
            continue

    # Save the results
    df = df.drop(columns=['จังหวัด'])
    df = df.rename(columns={'province': 'จังหวัด'})
    df.to_csv(output_csv, index=False, encoding='utf-8')
    print("Processing completed!")

    # Print summary statistics
    provinces_found = df['จังหวัด'].value_counts()
    print("\nAccidents by province:")
    print(provinces_found)

In [23]:
input_csv = "accident2024 + $Damage.csv"
df = pd.read_csv(input_csv)
df

Unnamed: 0,ปีที่เกิดเหตุ,วันที่เกิดเหตุ,เวลา,วันที่รายงาน,เวลาที่รายงาน,ACC_CODE,หน่วยงาน,สายทางหน่วยงาน,รหัสสายทาง,สายทาง,...,รถบรรทุกไม่เกิน10ล้อ,รถบรรทุกมากกว่า10ล้อ,รถอีแต๋น,อื่นๆ,คนเดินเท้า,จำนวนผู้เสียชีวิต,จำนวนผู้บาดเจ็บสาหัส,จำนวนผู้บาดเจ็บเล็ก,รวมจำนวนผู้บาดเจ็บ,มูลค่าความเสียหาย
0,2024.0,01-01-24,0:12,06/18/2024,14:23,9701543.0,กรมทางหลวง,ทางหลวง,4164,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,62536
1,2024.0,01-01-24,0:30,01-01-24,12:13,8901889.0,กรมทางหลวง,ทางหลวง,106,ลี้ - ม่วงโตน,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,653759
2,2024.0,01-01-24,0:30,01-02-24,4:18,8902334.0,กรมทางหลวง,ทางหลวง,1143,น้ำคลาด - ปางหมิ่น,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,423925
3,2024.0,01-01-24,0:30,01-01-24,11:40,8902375.0,กรมทางหลวง,ทางหลวง,3390,หนองรี - บ่อยาง,...,0.0,0.0,0.0,0.0,2.0,0.0,1.0,1.0,2.0,406010
4,2024.0,01-01-24,0:30,01-01-24,19:07,8902399.0,กรมทางหลวง,ทางหลวง,221,แยกการช่าง - เชิงบันไดเขาพระวิหาร,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,454972
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19386,2024.0,10/31/2024,16:10,11-11-24,10:56,9935471.0,กรมทางหลวง,ทางหลวง,4,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,857762
19387,2024.0,10/31/2024,18:08,11-08-24,15:21,9933918.0,กรมทางหลวง,ทางหลวง,213,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,141167
19388,2024.0,10/31/2024,18:45,11-01-24,13:27,9928627.0,กรมทางหลวง,ทางหลวง,24,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,6.0,764155
19389,2024.0,10/31/2024,21:00,11-04-24,14:49,9930018.0,กรมทางหลวง,ทางหลวง,214,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,657271


In [25]:
boundary_json = "master_data/thailand_provinces.json"  # File containing all province boundaries
output_csv = "../output/accident2024_Damage_provinces.csv"

process_accident_data(input_csv, boundary_json, output_csv)

Processing province boundaries...
Loaded 77 province boundaries
Processing 19391 accident records...
Processed 0 rows (0.0%)...
Processed 1000 rows (5.2%)...
Processed 2000 rows (10.3%)...
Processed 3000 rows (15.5%)...
Processed 4000 rows (20.6%)...
Processed 5000 rows (25.8%)...
Processed 6000 rows (30.9%)...
Processed 7000 rows (36.1%)...
Processed 8000 rows (41.3%)...
Processed 9000 rows (46.4%)...
Processed 10000 rows (51.6%)...
Processed 11000 rows (56.7%)...
Processed 12000 rows (61.9%)...
Processed 13000 rows (67.0%)...
Processed 14000 rows (72.2%)...
Processed 15000 rows (77.4%)...
Processed 16000 rows (82.5%)...
Processed 17000 rows (87.7%)...
Processed 18000 rows (92.8%)...
Processed 19000 rows (98.0%)...
Processing completed!

Accidents by province:
จังหวัด
กรุงเทพมหานคร    2305
ชลบุรี           1224
นครราชสีมา        912
สุพรรณบุรี        579
เชียงใหม่         575
                 ... 
พิจิตร             51
อำนาจเจริญ         49
แม่ฮ่องสอน         40
ระนอง              36
