In [1]:
import geopandas as gpd

# Load the roads dataset(https://data.sa.gov.au/data/dataset/roads/resource/e0c742b7-0762-48f3-8d4f-27a877d98baa)
roads = gpd.read_file('Roads_geojson/Roads_GDA2020.geojson')

# Inspect the columns
print(roads.columns)


Index(['persistentid', 'featurecode', 'name', 'roadtype', 'typesuffix',
       'class', 'surface', 'routenum', 'status', 'ontype', 'capturesource',
       'capturemethod', 'featuresource', 'featurereldate', 'attributereldate',
       'horizontalaccuracy', 'fa_auditdate', 'fa_class', 'fa_source',
       'fa_method', 'fa_status', 'fa_validationdate', 'roadusetype',
       'roaduseauthority', 'road_id', 'one_way', 'f_elev', 't_elev',
       'crrs_road_no', 'cwy_code', 'tars_road_no', 'suburbidleft',
       'suburbidright', 'last_edited_date', 'shape_Length', 'geometry'],
      dtype='object')


In [2]:
import pandas as pd
from shapely.geometry import Point

# Convert roads to WGS 84 (EPSG:4326) to match the CSV data
roads = roads.to_crs(epsg=4326)

# Load the CSV data with latitude, longitude, and location type
df = pd.read_csv('WorkSA1.csv')

In [3]:
# Function to get road names with enhanced precision and fallback mechanisms
def get_road_names(row, roads_proj, point_proj):
    lat, lon, loc_type = row['latitude'], row['longitude'], row['loc_type']
    point = Point(lon, lat)

    # Define multiple buffer sizes for precision issues
    buffer_sizes = [0.0001, 0.0003, 0.0005]  # Start small and increment as needed

    if loc_type.lower() == 'midblock':
        for buffer_distance in buffer_sizes:
            # Try finding the road by buffering the point
            containing_road = roads[roads.intersects(point.buffer(buffer_distance))]
            if not containing_road.empty:
                # Extract the road name and type
                road_name = f"{containing_road.iloc[0]['name']} {containing_road.iloc[0]['roadtype']}"
                print(f"Fetched location for midblock ({lat}, {lon}): {road_name}")
                return road_name

        # Fallback: Find the nearest road if no direct match found
        nearest_road = roads_proj.distance(point_proj).idxmin()
        nearest_name = roads.iloc[nearest_road]['name']
        nearest_type = roads.iloc[nearest_road]['roadtype']
        road_name = f"{nearest_name} {nearest_type}"
        print(f"Nearest location for midblock ({lat}, {lon}): {road_name}")
        return road_name

    elif loc_type.lower() == 'intersection':
        for buffer_distance in buffer_sizes:
            # Look for intersecting roads by buffering
            intersecting_roads = roads[roads.intersects(point.buffer(buffer_distance))]
            road_names = {f"{road['name']} {road['roadtype']}" for _, road in intersecting_roads.iterrows()}
            if road_names:
                location = ' and '.join(sorted(road_names))
                print(f"Fetched location for intersection ({lat}, {lon}): {location}")
                return location

        # Use nearest method as a fallback
        nearest_road = roads_proj.distance(point_proj).idxmin()
        nearest_name = roads.iloc[nearest_road]['name']
        nearest_type = roads.iloc[nearest_road]['roadtype']
        road_name = f"{nearest_name} {nearest_type}"
        print(f"Nearest intersection for ({lat}, {lon}): {road_name}")
        return road_name

    print(f"No road found for ({lat}, {lon}, {loc_type})")
    return 'Unknown'

# Convert roads to a projected CRS (e.g., UTM Zone 54S for South Australia)
roads_proj = roads.to_crs(epsg=28354)

# Create a GeoDataFrame for the points to be projected
df['geometry'] = df.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)
points_gdf = gpd.GeoDataFrame(df, geometry='geometry', crs='EPSG:4326')

# Reproject the points to the same projected CRS as the roads
points_gdf_proj = points_gdf.to_crs(epsg=28354)

# Apply the function to the DataFrame, using projected geometries for distance calculations
df['location'] = df.apply(lambda row: get_road_names(row, roads_proj, points_gdf_proj.loc[row.name, 'geometry']), axis=1)

# Display the result
print(df.head())

Fetched location for intersection (-34.871247, 138.634093): FLORIEDALE ROAD and HEWER STREET and MULLER ROAD
Fetched location for midblock (-34.815102, 138.594204): SALISBURY HIGHWAY
Fetched location for midblock (-34.923639, 138.570563): HENLEY BEACH ROAD
Fetched location for midblock (-34.749319, 138.590943): TARQUI DRIVE
Fetched location for midblock (-35.053522, 138.877278): SOUTH EASTERN FREEWAY
Fetched location for midblock (-34.914666, 138.557412): ASHWIN PARADE
Fetched location for intersection (-35.019255, 138.567124): FLINDERS DRIVE and SOUTHERN EXPRESSWAY
Fetched location for midblock (-37.988371, 140.873971): GLENELG RIVER ROAD
Fetched location for intersection (-37.825942, 140.781972): PENOLA ROAD and WYATT STREET
Fetched location for midblock (-34.154269, 140.353211): OLD COACH ROAD
Fetched location for midblock (-36.154104, 140.42924): DUKES HIGHWAY
Fetched location for midblock (-35.666891, 137.10333): NORTH COAST ROAD
Fetched location for midblock (-34.267172, 138.2300

In [6]:
# Save the updated DataFrame
df.to_csv('Final_SA.csv', index=False)


In [7]:
# Count the number of 'Unknown' values in the location column
unknown_count = df['location'].value_counts().get('Unknown', 0)
print("Number of 'Unknown' values in 'location' column:", unknown_count)

# Count 'Unknown' values associated with 'Intersection'
unknown_intersection_count = df[(df['location'] == 'Unknown') & (df['loc_type'].str.lower() == 'intersection')].shape[0]

# Count 'Unknown' values associated with 'Midblock'
unknown_midblock_count = df[(df['location'] == 'Unknown') & (df['loc_type'].str.lower() == 'midblock')].shape[0]

print("Number of 'Unknown' values after 'Intersection':", unknown_intersection_count)
print("Number of 'Unknown' values after 'Midblock':", unknown_midblock_count)

Number of 'Unknown' values in 'location' column: 0
Number of 'Unknown' values after 'Intersection': 0
Number of 'Unknown' values after 'Midblock': 0
