In [1]:
import pandas as pd
from pathlib import Path

csv_path = Path("secretariats.csv")
if not csv_path.exists():
    raise FileNotFoundError(f"Cannot find {csv_path} in working directory: {Path.cwd()}")

secretariats_df = pd.read_csv(csv_path, encoding="utf-8")
secretariats_df.head()

Unnamed: 0,district_name,district_code,mandal_name,mandal_code,secretariat_code,secretariat_name,latitude,longitude
0,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490858,GONDOLU,17.450918,82.064411
1,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490857,GAVARAYYAPETA,17.461271,82.111261
2,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490860,KIMMURU,17.349885,82.127516
3,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490868,ADDATEEGALA2,17.480297,82.023781
4,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490856,DUCHARTHI,17.584431,82.05724


In [2]:
import geopandas as gpd
from shapely.geometry import Point

# convert secretariats_df to a GeoDataFrame using latitude and longitude columns

if not {'latitude', 'longitude'}.issubset(secretariats_df.columns):
    raise KeyError("secretariats_df must contain 'latitude' and 'longitude' columns")

gdf = secretariats_df.copy()
gdf['latitude'] = pd.to_numeric(gdf['latitude'], errors='coerce')
gdf['longitude'] = pd.to_numeric(gdf['longitude'], errors='coerce')
gdf = gdf.dropna(subset=['latitude', 'longitude'])

gdf = gpd.GeoDataFrame(gdf, geometry=gpd.points_from_xy(gdf['longitude'], gdf['latitude']), crs="EPSG:4326")
gdf.head()

Unnamed: 0,district_name,district_code,mandal_name,mandal_code,secretariat_code,secretariat_name,latitude,longitude,geometry
0,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490858,GONDOLU,17.450918,82.064411,POINT (82.06441 17.45092)
1,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490857,GAVARAYYAPETA,17.461271,82.111261,POINT (82.11126 17.46127)
2,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490860,KIMMURU,17.349885,82.127516,POINT (82.12752 17.34988)
3,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490868,ADDATEEGALA2,17.480297,82.023781,POINT (82.02378 17.4803)
4,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490856,DUCHARTHI,17.584431,82.05724,POINT (82.05724 17.58443)


In [26]:
gdf['unit']=gdf['secretariat_name']
gdf.head()


Unnamed: 0,district_name,district_code,mandal_name,mandal_code,secretariat_code,secretariat_name,latitude,longitude,geometry,unit
0,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490858,GONDOLU,17.450918,82.064411,POINT (82.06441 17.45092),GONDOLU
1,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490857,GAVARAYYAPETA,17.461271,82.111261,POINT (82.11126 17.46127),GAVARAYYAPETA
2,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490860,KIMMURU,17.349885,82.127516,POINT (82.12752 17.34988),KIMMURU
3,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490868,ADDATEEGALA2,17.480297,82.023781,POINT (82.02378 17.4803),ADDATEEGALA2
4,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490856,DUCHARTHI,17.584431,82.05724,POINT (82.05724 17.58443),DUCHARTHI


In [47]:
# replace the 'unit' column  values matching with  r'().+)-0([0-9]{2,})' with r'\1-\2' 
gdf['unit'] = gdf['unit'].str.replace(r'(.+)-0([1-9])', r'\1-\2', regex=True)



In [None]:

# replace the 'unit' column  values matching with  'r'(.*(?<!-))(\d+)$')' with r'\1-\2' 
gdf['unit'] = gdf['unit'].str.replace(r'(.*(?<!-))(\d+)$', r'\1-\2', regex=True)


Unnamed: 0,district_name,district_code,mandal_name,mandal_code,secretariat_code,secretariat_name,latitude,longitude,geometry,unit
0,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490858,GONDOLU,17.450918,82.064411,POINT (82.06441 17.45092),GONDOLU
1,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490857,GAVARAYYAPETA,17.461271,82.111261,POINT (82.11126 17.46127),GAVARAYYAPETA
2,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490860,KIMMURU,17.349885,82.127516,POINT (82.12752 17.34988),KIMMURU
3,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490868,ADDATEEGALA2,17.480297,82.023781,POINT (82.02378 17.4803),ADDATEEGALA-2
4,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490856,DUCHARTHI,17.584431,82.05724,POINT (82.05724 17.58443),DUCHARTHI


In [68]:
# convert 'unit' column to title case
gdf['unit'] = gdf['unit'].str.title()
gdf.head()



Unnamed: 0,district_name,district_code,mandal_name,mandal_code,secretariat_code,secretariat_name,latitude,longitude,geometry,unit
0,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490858,GONDOLU,17.450918,82.064411,POINT (82.06441 17.45092),Gondolu
1,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490857,GAVARAYYAPETA,17.461271,82.111261,POINT (82.11126 17.46127),Gavarayyapeta
2,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490860,KIMMURU,17.349885,82.127516,POINT (82.12752 17.34988),Kimmuru
3,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490868,ADDATEEGALA2,17.480297,82.023781,POINT (82.02378 17.4803),Addateegala-2
4,ALLURI SITHARAMA RAJU,745,ADDATEEGALA (R),4887,10490856,DUCHARTHI,17.584431,82.05724,POINT (82.05724 17.58443),Ducharthi
