### **Merge Shapefile and Dataframe**

In [1]:
# import necessary libraries
import geopandas as gpd
from fuzzywuzzy import fuzz, process
import pandas as pd
import re


  shapely_geos_version, geos_capi_version_string


In [2]:
# Read path to file
shapefile_path = 'Shapefiles/constituencies/constituencies.shp'
csv_path = 'Data/Admin.csv'

gdf_shapefile = gpd.read_file(shapefile_path)
df_csv = pd.read_csv(csv_path)




In [3]:
# Step 2: Preprocess ward names and ward codes
def preprocess_shapefile_constituency_name(ward_name):
    # Remove 'ward', '-' and '/' characters, and convert to lowercase
    return re.sub(r'ward|-|/', '', ward_name, flags=re.IGNORECASE).strip().lower()

def preprocess_csv_constituency_name(ward_name):
    # Remove '/' characters and convert to lowercase
    return re.sub(r'/', '', ward_name).lower()



In [7]:
# Apply preprocessing to shapefile and CSV file ward names
gdf_shapefile['constituency_name_preprocessed'] = gdf_shapefile['ward'].apply(preprocess_shapefile_constituency_name)
df_csv['constituency_name_preprocessed'] = df_csv['Ward_Name'].apply(preprocess_csv_constituency_name)

In [9]:
# Step 3: Perform fuzzy matching to find the best matches for ward names
def fuzzy_match(ward_name, choices):
    # Use process.extractOne to find the best match from the choices list
    match, score = process.extractOne(ward_name, choices, scorer=fuzz.ratio)
    if score >= 60:  # Adjust the threshold as per your requirement
        return match
    else:
        return None



In [10]:
# Create a dictionary to store the mappings between ward names in the shapefile and CSV
ward_mapping = {}

for index, row in df_csv.iterrows():
    ward_name_csv = row['ward_name_preprocessed']
    ward_code_csv = row['Ward_Code']

    # Find the best match for the ward name in the shapefile
    match_ward_name = fuzzy_match(ward_name_csv, gdf_shapefile['ward_name_preprocessed'].tolist())

    if match_ward_name:
        ward_mapping[match_ward_name] = ward_code_csv



In [11]:
# Step 4: Update the attribute table in the shapefile with the ward codes
gdf_shapefile['ward_code'] = gdf_shapefile['ward_name_preprocessed'].map(ward_mapping)

In [12]:
# Step 5: Save the updated shapefile
output_shapefile_path = 'Shapefiles/wards/ke_wards.shp'
gdf_shapefile.to_file(output_shapefile_path)

  This is separate from the ipykernel package so we can avoid doing imports until
