In [2]:
import geopandas as gpd
from shapely.geometry import Polygon, MultiPolygon
import os
import icecream as ic
import pandas as pd 
from concurrent.futures import ProcessPoolExecutor
from functools import partial
from pyproj import CRS

from dhs_preprocessing_functions import *

In [3]:
base_folder = '/mnt/datadisk/data/gis_data/fewsnet/'
fewsnet_p = base_folder + 'ALL_HFIC/'
folder_l = ['East Africa', 'West Africa', 'Southern Africa']
gaul_adm2_f = '/mnt/datadisk/data/gis_data/gaul_fixed/gaul_2015-2014_adm2.shp'
out_f = base_folder + 'fewsnet_gaul_adm2.csv'

coordinate_reference_system_wkt = """
PROJCS["Africa_Albers_Equal_Area_Conic",
GEOGCS["GCS_WGS_1984",
    DATUM["WGS_1984",
        SPHEROID["WGS_1984",6378137,298.257223563]],
    PRIMEM["Greenwich",0],
    UNIT["Degree",0.017453292519943295]],
PROJECTION["Albers_Conic_Equal_Area"],
PARAMETER["False_Easting",0],
PARAMETER["False_Northing",0],
PARAMETER["longitude_of_center",25],
PARAMETER["Standard_Parallel_1",20],
PARAMETER["Standard_Parallel_2",-23],
PARAMETER["latitude_of_center",0],
UNIT["Meter",1],
AUTHORITY["EPSG","102022"]]
"""
prj_str = '+proj=aea +lat_1=20 +lat_2=-23 +lat_0=0 +lon_0=25 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs'
africa_albers_crs = CRS(coordinate_reference_system_wkt)


In [4]:
files = []
for folder in os.listdir(fewsnet_p):
    if folder in folder_l:
        folder_p = os.path.join(fewsnet_p, folder)
        files.extend(get_files_by_extension(folder_p, '.shp'))
len(files)

138

In [5]:
gaul_adm2 = gpd.read_file(gaul_adm2_f)

In [6]:
gaul_adm2_d = gaul_adm2[gaul_adm2.duplicated(['adm0_name', 'adm1_name', 'adm2_name'])]
gaul_adm2_d

Unnamed: 0,OBJECTID,adm2_code,adm2_name,str2_year,exp2_year,adm1_code,adm1_name,status,disp_area,adm0_code,adm0_name,shape_leng,FID_1,Shape_Le_1,Shape_Area,geometry
129,130.0,40579.0,Administrative unit not available,0.0,0.0,40544.0,Bujumbura Rural,Member State,NO,43.0,Burundi,0.352109,129,0.35293,0.002627,"POLYGON ((29.29010 -3.35000, 29.28930 -3.35480..."
621,623.0,41376.0,Manhica,2013.0,0.0,41373.0,Maputo,Member State,NO,170.0,Mozambique,3.238343,622,3.247867,0.161485,"POLYGON ((32.80370 -24.98250, 32.80430 -24.983..."
1418,1420.0,74364.0,Kasai,2003.0,0.0,1069.0,Kasai Occidental,Member State,NO,68.0,Democratic Republic of the Congo,0.710118,1419,0.710141,0.031074,"POLYGON ((20.93130 -6.33930, 20.92810 -6.37980..."
1419,1421.0,74359.0,Kolwezi,0.0,0.0,1071.0,Katanga,Member State,NO,68.0,Democratic Republic of the Congo,0.454795,1420,0.45479,0.009497,"POLYGON ((25.55910 -10.71200, 25.55110 -10.732..."
4627,4629.0,18994.0,Name Unknown,0.0,0.0,1811.0,Quthing,Member State,NO,142.0,Lesotho,0.387503,4900,0.387433,0.004209,"POLYGON ((27.97630 -30.18740, 27.97760 -30.188..."
4628,4630.0,18995.0,Name Unknown,0.0,0.0,1811.0,Quthing,Member State,NO,142.0,Lesotho,0.769961,4901,0.769869,0.015634,"POLYGON ((28.10390 -30.21290, 28.10580 -30.214..."
4629,4631.0,18996.0,Name Unknown,0.0,0.0,1811.0,Quthing,Member State,NO,142.0,Lesotho,0.316463,4902,0.316549,0.002946,"POLYGON ((27.82050 -30.40740, 27.81950 -30.408..."
4630,4632.0,18997.0,Name Unknown,0.0,0.0,1811.0,Quthing,Member State,NO,142.0,Lesotho,0.900641,4903,0.900654,0.01609,"POLYGON ((28.28480 -30.14810, 28.28480 -30.148..."
4631,4633.0,18998.0,Name Unknown,0.0,0.0,1811.0,Quthing,Member State,NO,142.0,Lesotho,0.463599,4904,0.463488,0.005868,"POLYGON ((27.74500 -30.43650, 27.74780 -30.436..."
4632,4634.0,18999.0,Name Unknown,0.0,0.0,1811.0,Quthing,Member State,NO,142.0,Lesotho,0.555768,4905,0.555953,0.008373,"POLYGON ((27.66290 -30.43860, 27.66440 -30.439..."


In [7]:
gaul_adm2_d = gaul_adm2[gaul_adm2.duplicated(['adm0_name', 'adm1_name', 'adm2_name'])]
gaul_adm2_d[['adm0_name', 'adm1_name', 'adm2_name']]
#create unique "Name Unknown" etc. names
found_names = set([])
ic(len(gaul_adm2_d))
for id, row in gaul_adm2_d.iterrows():
    n = row['adm2_name']
    if n not in found_names:
        found_names.add(n)
        continue
    nr = 2
    name = n + ' ' + str(nr)
    while name in found_names:
        name = n + ' ' + str(nr)
        nr += 1
    found_names.add(name)
    gaul_adm2.loc[id, 'adm2_name'] = name


ic| len(gaul_adm2_d): 52


In [8]:
gaul_adm2_d = gaul_adm2[gaul_adm2.duplicated(['adm0_name', 'adm1_name', 'adm2_name'])]
gaul_adm2_d[['adm0_name', 'adm1_name', 'adm2_name']]
gaul_adm2[gaul_adm2['adm2_name'] == 'Kasai']

Unnamed: 0,OBJECTID,adm2_code,adm2_name,str2_year,exp2_year,adm1_code,adm1_name,status,disp_area,adm0_code,adm0_name,shape_leng,FID_1,Shape_Le_1,Shape_Area,geometry
1391,1393.0,74367.0,Kasai,2003.0,0.0,1069.0,Kasai Occidental,Member State,NO,68.0,Democratic Republic of the Congo,19.294114,1392,19.295214,7.838617,"POLYGON ((21.75540 -2.49130, 21.76880 -2.49800..."
1418,1420.0,74364.0,Kasai,2003.0,0.0,1069.0,Kasai Occidental,Member State,NO,68.0,Democratic Republic of the Congo,0.710118,1419,0.710141,0.031074,"POLYGON ((20.93130 -6.33930, 20.92810 -6.37980..."


In [9]:
gaul_adm2 = gaul_adm2.to_crs(africa_albers_crs)
#gaul_adm2.geometry = gaul_adm2.geometry.buffer(-0.0001)
gaul_adm2['gaul area sqm'] = gaul_adm2.geometry.area


In [10]:
  
def spatial_join_w_gaul(f, gaul_adm2=gaul_adm2):
    """Spatial join a shapefile to the gaul_adm2 shapefile
    
    Parameters
    ----------
    f : str
        path to shapefile
    gaul_adm2 : geopandas.GeoDataFrame
        gaul_adm2 shapefile
    
    Returns
    -------
    geopandas.GeoDataFrame
        joined dataframe
    """
    #ic(f)
    # Load your two shapefiles 
    poly_1 = gpd.read_file(f)
    poly_1 = poly_1.to_crs(africa_albers_crs)
    # Slightly shrink the geometries to remove small overlaps
    #poly_1.geometry = poly_1.geometry.buffer(-0.0001)

    # Compute the geometric intersection of the geometries in the two GeoDataFrames
    joined = gpd.overlay(gaul_adm2, poly_1, how='intersection', keep_geom_type=False)
    #joined = gpd.sjoin(gaul_adm2, poly_1, how="inner", predicate="intersects")
    
    # Retrieve file name
    file_name = os.path.basename(f)
    file_parts = file_name.split('_')
    if len(file_parts) != 3:
        ic(file_parts)
        raise ValueError('file name does not match expected format')
    
    if len(file_parts[1]) != 6:
        ic(file_parts)
        raise ValueError('file name does not match expected format')
    
    year = file_parts[1][:4]
    month = file_parts[1][4:]
    try:
        joined['year'] = int(year)
        joined['month'] = int(month)
    except ValueError as e:
        ic(year, month)
        raise ValueError(f'month or year is not an integer: {e}')
    
    # Find largest polygon for each row
    #joined['largest_polygon'] = joined['geometry'].apply(lambda x: 
     #   max(x.geoms if isinstance(x, MultiPolygon) else [x], key=lambda p: p.area))

       # Drop other polygons
    # Replace the existing 'geometry' column with the 'largest_polygon' column
    #joined.geometry = joined['largest_polygon']

    # Drop the 'largest_polygon' column
    #joined = joined.drop(columns=['largest_polygon'])
    
    # Compute the area of the intersection
    joined['intersection area sqm'] = joined.geometry.area
     # Compute the ratio of the intersection area to the gaul_adm2 area
    joined['area_ratio'] = joined['intersection area sqm'] / joined['gaul area sqm']

    # Set your minimum area ratio here
    min_area_ratio = 0.55  # 10%

    # Drop rows where the area ratio is less than the minimum
    joined = joined[joined['area_ratio'] >= min_area_ratio]
    if len(joined[joined['area_ratio'] < min_area_ratio]) > 0:
        ic(joined[joined['area_ratio'] < min_area_ratio][['adm0_name', 'adm1_name', 'adm2_name', 'area_ratio', 'CS', 'year', 'month']])
    
    # Now you can write the GeoDataFrame to a file
    #joined.to_file(out_f[:-4] + f'_{year}_{month}.shp', driver='ESRI Shapefile')
    return joined

In [11]:
partial_spatial_join_w_gaul = partial(spatial_join_w_gaul, gaul_adm2=gaul_adm2)
with ProcessPoolExecutor(max_workers=47) as executor:
    gdf_l = list(executor.map(partial_spatial_join_w_gaul, files))

#ic(gdf_l)
gdf_l[0]

Unnamed: 0,OBJECTID,adm2_code,adm2_name,str2_year,exp2_year,adm1_code,adm1_name,status,disp_area,adm0_code,...,FID_1,Shape_Le_1,Shape_Area,gaul area sqm,CS,geometry,year,month,intersection area sqm,area_ratio
11,1597.0,65431.0,Daguela,2000.0,0.0,873.0,Guera,Member State,NO,50.0,...,1596,4.864569,1.068880,1.294012e+10,1,"POLYGON ((-551682.957 1287588.686, -548444.423...",2009,7,1.294012e+10,1.000000
12,1599.0,65436.0,Dekakire,2000.0,0.0,873.0,Guera,Member State,NO,50.0,...,1598,1.330040,0.083421,1.008068e+09,1,"POLYGON ((-776668.891 1319564.923, -775390.692...",2009,7,1.008068e+09,1.000000
13,1602.0,65477.0,Gogmi,2000.0,0.0,873.0,Guera,Member State,NO,50.0,...,1601,2.463850,0.247936,2.993739e+09,1,"POLYGON ((-677510.840 1342220.439, -680943.739...",2009,7,2.993739e+09,1.000000
16,1605.0,65593.0,Melfi,2000.0,0.0,873.0,Guera,Member State,NO,50.0,...,1604,2.918849,0.501288,6.064507e+09,1,"POLYGON ((-670869.593 1276962.936, -671791.826...",2009,7,6.064506e+09,1.000000
17,1606.0,65606.0,Mokofi,2000.0,0.0,873.0,Guera,Member State,NO,50.0,...,1605,1.984134,0.231126,2.789912e+09,1,"POLYGON ((-742077.889 1357664.511, -740189.347...",2009,7,2.789912e+09,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1841,5611.0,19490.0,Aoujeft,0.0,0.0,2004.0,Adrar,Member State,NO,159.0,...,5886,7.434381,2.112745,2.456315e+10,3,"POLYGON ((-3955087.024 2342527.745, -3956514.8...",2009,7,2.410995e+10,0.981550
1846,5623.0,19503.0,Maghta Lahjar,0.0,0.0,2006.0,Brakna,Member State,NO,159.0,...,5898,4.641245,1.177646,1.382264e+10,3,"POLYGON ((-3903102.025 1976481.565, -3903150.6...",2009,7,1.373010e+10,0.993305
1847,5624.0,19504.0,Nouadhibou,0.0,0.0,2007.0,Dakhlet-Nouadhibou,Member State,NO,159.0,...,5899,16.272074,1.984308,2.291019e+10,3,"MULTIPOLYGON (((-4147056.411 2430230.744, -414...",2009,7,1.679366e+10,0.733021
1848,5632.0,19512.0,Bassikounou,0.0,0.0,2010.0,Hodh Ech Chargi,Member State,NO,159.0,...,5907,5.190273,1.453438,1.719599e+10,3,"POLYGON ((-3195228.375 1791125.335, -3197278.0...",2009,7,1.655284e+10,0.962599


In [12]:
df = gpd.GeoDataFrame(pd.concat(gdf_l, axis=0, ignore_index=True))
df = df[df['CS'] <= 5]
merge_cols = ['adm2_name', 'adm1_name', 'adm0_name']
group_cols = ['adm2_name', 'adm1_name', 'adm0_name', 'month', 'year']

grouped_gdf = df.groupby(group_cols)

for id, group in grouped_gdf:
    if len(group) > 1:
        #drop the smaller area
        df.drop(group[group['area_ratio'] < group['area_ratio'].max()].index, inplace=True)
        
grouped_gdf = df.groupby(group_cols)

for id, group in grouped_gdf:
    if len(group) > 1:
        ic(group[group_cols + ['CS', 'area_ratio', 'intersection area sqm', 'gaul area sqm']])
        


In [13]:
df = df.drop_duplicates(group_cols)
csv_df = df.drop(columns=['geometry'])
csv_df.to_csv(out_f, index=False)

In [14]:
gaul_adm2b = gaul_adm2.merge(df[merge_cols].drop_duplicates(),
                             how='inner',
                             on=merge_cols)

gaul_adm2b

Unnamed: 0,OBJECTID,adm2_code,adm2_name,str2_year,exp2_year,adm1_code,adm1_name,status,disp_area,adm0_code,adm0_name,shape_leng,FID_1,Shape_Le_1,Shape_Area,geometry,gaul area sqm
0,1.0,40621.0,Ryansoro,0.0,0.0,40548.0,Gitega,Member State,NO,43.0,Burundi,0.754714,0,0.754908,0.013315,"POLYGON ((496288.608 -431254.979, 496193.976 -...",1.635631e+08
1,2.0,40667.0,Ndava,0.0,0.0,40555.0,Mwaro,Member State,NO,43.0,Burundi,0.721689,1,0.721861,0.013685,"POLYGON ((499404.614 -400379.415, 499352.089 -...",1.681628e+08
2,3.0,40593.0,Buyengero,0.0,0.0,40545.0,Bururi,Member State,NO,43.0,Burundi,0.744050,2,0.744971,0.016754,"POLYGON ((476715.760 -449735.627, 476362.499 -...",2.057737e+08
3,4.0,40580.0,Bugarama,2004.0,0.0,40544.0,Bujumbura Rural,Member State,NO,43.0,Burundi,0.622149,3,0.622037,0.010563,"POLYGON ((465268.183 -435503.214, 465423.123 -...",1.297627e+08
4,6.0,40591.0,Burambi,0.0,0.0,40545.0,Bururi,Member State,NO,43.0,Burundi,0.803154,5,0.804049,0.022236,"POLYGON ((464874.927 -440939.752, 465081.601 -...",2.731161e+08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3184,8840.0,32711.0,Juban,0.0,0.0,3408.0,Al Dhale'e,Member State,NO,269.0,Yemen,1.345190,31830,1.345397,0.100466,"POLYGON ((2083055.569 1658634.157, 2084295.395...",1.200621e+09
3185,8841.0,32712.0,Qa'atabah,0.0,0.0,3408.0,Al Dhale'e,Member State,NO,269.0,Yemen,1.272211,31831,1.272467,0.057365,"POLYGON ((2064542.199 1635393.380, 2065419.279...",6.858922e+08
3186,8842.0,32935.0,Al Jafariyah,0.0,0.0,144971.0,Raymah,Member State,NO,269.0,Yemen,0.873004,31832,0.873457,0.023535,"POLYGON ((1943044.343 1706863.096, 1943040.815...",2.806844e+08
3187,8843.0,32941.0,As Salafiyah,0.0,0.0,144971.0,Raymah,Member State,NO,269.0,Yemen,0.882384,31833,0.882354,0.031684,"POLYGON ((1957971.813 1728988.174, 1958163.975...",3.776079e+08


In [15]:
gaul_adm2b[gaul_adm2b['adm2_name'] == 'Manhica'][['adm0_name', 'adm1_name', 'adm2_name', 'Shape_Area', 'geometry']]

Unnamed: 0,adm0_name,adm1_name,adm2_name,Shape_Area,geometry
547,Mozambique,Maputo,Manhica,0.050725,"POLYGON ((808074.132 -2904226.965, 808084.338 ..."
548,Mozambique,Maputo,Manhica,0.161485,"POLYGON ((799329.522 -2894199.534, 799390.653 ..."


In [16]:
df[df[merge_cols + ['year', 'month']].duplicated()]
df[df['adm2_name'] == 'Manhica'][['adm0_name', 'adm1_name', 'adm2_name', 'year', 'month', 'CS']]

Unnamed: 0,adm0_name,adm1_name,adm2_name,year,month,CS
92532,Mozambique,Maputo,Manhica,2009,7,1.0
92834,Mozambique,Maputo,Manhica,2009,10,1.0
93121,Mozambique,Maputo,Manhica,2010,1,1.0
93408,Mozambique,Maputo,Manhica,2010,4,1.0
93741,Mozambique,Maputo,Manhica,2010,7,1.0
94036,Mozambique,Maputo,Manhica,2010,10,1.0
94338,Mozambique,Maputo,Manhica,2011,1,1.0
94668,Mozambique,Maputo,Manhica,2011,4,1.0
94972,Mozambique,Maputo,Manhica,2011,7,1.0
95274,Mozambique,Maputo,Manhica,2011,10,1.0


In [17]:
def gaul_merger(g, gaul_df, to_merge_cols, merge_cols=['adm2_name', 'adm1_name', 'adm0_name']):
    g = g[1]
    
    if len(g) != len(g[merge_cols].drop_duplicates()):
        ic(g[g.duplicated()][merge_cols + to_merge_cols])
    if len(gaul_df) != len(gaul_df[merge_cols].drop_duplicates()):
        ic(gaul_df[gaul_df.duplicated()][merge_cols])
    date_str = str(g['month'].iloc[0]).zfill(2) + str(g['year'].iloc[0])
    g = g.rename(columns={c: c + '_' + date_str for c in to_merge_cols})
    to_merge_cols2 = [c + '_' + date_str for c in to_merge_cols]
    out_df = gaul_df[merge_cols].merge(g[to_merge_cols2 + merge_cols],
                                     on=merge_cols, right_index=False, how='left')[merge_cols + to_merge_cols2]

    return out_df

In [19]:
# Group by "year" and "month" and aggregate the geometries
cols = ['CS', 'HA', 'HA0'] + merge_cols + ['year', 'month']
cols = [c for c in cols if c in df.columns or c.lower() in [c2.lower() for c2 in df.columns]]
to_merge_cols = [c for c in cols if c not in merge_cols + ['year', 'month']]
ic(to_merge_cols)
grouped_gdf = df[cols]
grouped_gdf = grouped_gdf.groupby(['year', 'month'])

fs_df = gaul_adm2b
for id, gdf in grouped_gdf:
    date_str = str(gdf['month'].iloc[0]).zfill(2) + str(gdf['year'].iloc[0])
    gdf = gdf.rename(columns={c: c + '_' + date_str for c in to_merge_cols})
    to_merge_cols2 = [c + '_' + date_str for c in to_merge_cols]
    fs_df = pd.merge(fs_df, gdf[merge_cols + to_merge_cols2], on=merge_cols, how='left')
ic(fs_df.columns)
ic(fs_df.shape)
ic(type(fs_df))
fs_df


#partial_gaul_merger = partial(gaul_merger, gaul_df=gaul_adm2b, to_merge_cols=to_merge_cols)
#with ProcessPoolExecutor(max_workers=47) as executor:
 #   gdf_parts = list(executor.map(partial_gaul_merger, grouped_gdf))

#gdf_parts[0]

ic| to_merge_cols: ['CS', 'HA', 'HA0']
ic| fs_df.columns: Index(['OBJECTID', 'adm2_code', 'adm2_name', 'str2_year', 'exp2_year',
                          'adm1_code', 'adm1_name', 'status', 'disp_area', 'adm0_code',
                          ...
                          'HA0_022021', 'CS_062021', 'HA_062021', 'HA0_062021', 'CS_102021',
                          'HA_102021', 'HA0_102021', 'CS_022022', 'HA_022022', 'HA0_022022'],
                         dtype='object', length=155)
ic| fs_df.shape: (3189, 155)
ic| type(fs_df): <class 'geopandas.geodataframe.GeoDataFrame'>


Unnamed: 0,OBJECTID,adm2_code,adm2_name,str2_year,exp2_year,adm1_code,adm1_name,status,disp_area,adm0_code,...,HA0_022021,CS_062021,HA_062021,HA0_062021,CS_102021,HA_102021,HA0_102021,CS_022022,HA_022022,HA0_022022
0,1.0,40621.0,Ryansoro,0.0,0.0,40548.0,Gitega,Member State,NO,43.0,...,0.0,1.0,,0.0,1.0,,0.0,1.0,,0.0
1,2.0,40667.0,Ndava,0.0,0.0,40555.0,Mwaro,Member State,NO,43.0,...,0.0,1.0,,0.0,1.0,,0.0,1.0,,0.0
2,3.0,40593.0,Buyengero,0.0,0.0,40545.0,Bururi,Member State,NO,43.0,...,0.0,1.0,,0.0,1.0,,0.0,1.0,,0.0
3,4.0,40580.0,Bugarama,2004.0,0.0,40544.0,Bujumbura Rural,Member State,NO,43.0,...,0.0,1.0,,0.0,,,,,,
4,6.0,40591.0,Burambi,0.0,0.0,40545.0,Bururi,Member State,NO,43.0,...,0.0,1.0,,0.0,1.0,,0.0,1.0,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3184,8840.0,32711.0,Juban,0.0,0.0,3408.0,Al Dhale'e,Member State,NO,269.0,...,0.0,3.0,,0.0,,,,3.0,,0.0
3185,8841.0,32712.0,Qa'atabah,0.0,0.0,3408.0,Al Dhale'e,Member State,NO,269.0,...,0.0,3.0,,0.0,,,,3.0,,0.0
3186,8842.0,32935.0,Al Jafariyah,0.0,0.0,144971.0,Raymah,Member State,NO,269.0,...,0.0,3.0,,0.0,,,,3.0,,0.0
3187,8843.0,32941.0,As Salafiyah,0.0,0.0,144971.0,Raymah,Member State,NO,269.0,...,0.0,3.0,,0.0,,,,3.0,,0.0


: 

In [20]:
fs_df = gpd.GeoDataFrame(fs_df)
#fs_df = pd.concat([gaul_adm2b[['adm2_name']]] + gdf_parts, axis=1)
#drop dupl
#fs_df


In [21]:
gaul_adm2b[['adm2_name']]

Unnamed: 0,adm2_name
0,Ryansoro
1,Ndava
2,Buyengero
3,Bugarama
4,Burambi
...,...
3184,Juban
3185,Qa'atabah
3186,Al Jafariyah
3187,As Salafiyah


In [22]:
fs_df

Unnamed: 0,OBJECTID,adm2_code,adm2_name,str2_year,exp2_year,adm1_code,adm1_name,status,disp_area,adm0_code,...,HA0_022021,CS_062021,HA_062021,HA0_062021,CS_102021,HA_102021,HA0_102021,CS_022022,HA_022022,HA0_022022
0,1.0,40621.0,Ryansoro,0.0,0.0,40548.0,Gitega,Member State,NO,43.0,...,0.0,1.0,,0.0,1.0,,0.0,1.0,,0.0
1,2.0,40667.0,Ndava,0.0,0.0,40555.0,Mwaro,Member State,NO,43.0,...,0.0,1.0,,0.0,1.0,,0.0,1.0,,0.0
2,3.0,40593.0,Buyengero,0.0,0.0,40545.0,Bururi,Member State,NO,43.0,...,0.0,1.0,,0.0,1.0,,0.0,1.0,,0.0
3,4.0,40580.0,Bugarama,2004.0,0.0,40544.0,Bujumbura Rural,Member State,NO,43.0,...,0.0,1.0,,0.0,,,,,,
4,6.0,40591.0,Burambi,0.0,0.0,40545.0,Bururi,Member State,NO,43.0,...,0.0,1.0,,0.0,1.0,,0.0,1.0,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3184,8840.0,32711.0,Juban,0.0,0.0,3408.0,Al Dhale'e,Member State,NO,269.0,...,0.0,3.0,,0.0,,,,3.0,,0.0
3185,8841.0,32712.0,Qa'atabah,0.0,0.0,3408.0,Al Dhale'e,Member State,NO,269.0,...,0.0,3.0,,0.0,,,,3.0,,0.0
3186,8842.0,32935.0,Al Jafariyah,0.0,0.0,144971.0,Raymah,Member State,NO,269.0,...,0.0,3.0,,0.0,,,,3.0,,0.0
3187,8843.0,32941.0,As Salafiyah,0.0,0.0,144971.0,Raymah,Member State,NO,269.0,...,0.0,3.0,,0.0,,,,3.0,,0.0


In [23]:
#gaul_adm2b = gaul_adm2b.merge(fs_df, on='adm2_name')

gaul_adm2b

Unnamed: 0,OBJECTID,adm2_code,adm2_name,str2_year,exp2_year,adm1_code,adm1_name,status,disp_area,adm0_code,adm0_name,shape_leng,FID_1,Shape_Le_1,Shape_Area,geometry,gaul area sqm
0,1.0,40621.0,Ryansoro,0.0,0.0,40548.0,Gitega,Member State,NO,43.0,Burundi,0.754714,0,0.754908,0.013315,"POLYGON ((496288.608 -431254.979, 496193.976 -...",1.635631e+08
1,2.0,40667.0,Ndava,0.0,0.0,40555.0,Mwaro,Member State,NO,43.0,Burundi,0.721689,1,0.721861,0.013685,"POLYGON ((499404.614 -400379.415, 499352.089 -...",1.681628e+08
2,3.0,40593.0,Buyengero,0.0,0.0,40545.0,Bururi,Member State,NO,43.0,Burundi,0.744050,2,0.744971,0.016754,"POLYGON ((476715.760 -449735.627, 476362.499 -...",2.057737e+08
3,4.0,40580.0,Bugarama,2004.0,0.0,40544.0,Bujumbura Rural,Member State,NO,43.0,Burundi,0.622149,3,0.622037,0.010563,"POLYGON ((465268.183 -435503.214, 465423.123 -...",1.297627e+08
4,6.0,40591.0,Burambi,0.0,0.0,40545.0,Bururi,Member State,NO,43.0,Burundi,0.803154,5,0.804049,0.022236,"POLYGON ((464874.927 -440939.752, 465081.601 -...",2.731161e+08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3184,8840.0,32711.0,Juban,0.0,0.0,3408.0,Al Dhale'e,Member State,NO,269.0,Yemen,1.345190,31830,1.345397,0.100466,"POLYGON ((2083055.569 1658634.157, 2084295.395...",1.200621e+09
3185,8841.0,32712.0,Qa'atabah,0.0,0.0,3408.0,Al Dhale'e,Member State,NO,269.0,Yemen,1.272211,31831,1.272467,0.057365,"POLYGON ((2064542.199 1635393.380, 2065419.279...",6.858922e+08
3186,8842.0,32935.0,Al Jafariyah,0.0,0.0,144971.0,Raymah,Member State,NO,269.0,Yemen,0.873004,31832,0.873457,0.023535,"POLYGON ((1943044.343 1706863.096, 1943040.815...",2.806844e+08
3187,8843.0,32941.0,As Salafiyah,0.0,0.0,144971.0,Raymah,Member State,NO,269.0,Yemen,0.882384,31833,0.882354,0.031684,"POLYGON ((1957971.813 1728988.174, 1958163.975...",3.776079e+08


In [None]:
#gaul_adm2b.columns

Index(['OBJECTID', 'adm2_code', 'adm2_name', 'str2_year', 'exp2_year',
       'adm1_code', 'adm1_name', 'status', 'disp_area', 'adm0_code',
       ...
       'HA0_022021', 'CS_062021', 'HA_062021', 'HA0_062021', 'CS_102021',
       'HA_102021', 'HA0_102021', 'CS_022022', 'HA_022022', 'HA0_022022'],
      dtype='object', length=155)

In [24]:
fs_df.to_file(out_f[:-4] + '.shp')

  fs_df.to_file(out_f[:-4] + '.shp')
