In [42]:
# Useful library imports
import pandas as pd
import geopandas as gpd
import folium
import os
import zipfile
from shapely import *
import numpy as np
import fiona


# functions to process PeopleGroups datasets
from features import *

<hr style="border: 5px solid #003262;" />
<hr style="border: 1px solid #fdb515;" />

# ** Note ** 
# Please ignore any and all code below. Still a work in progress.

<hr style="border: 5px solid #003262;" />
<hr style="border: 1px solid #fdb515;" />

# Revising Method

In [None]:
morocco_ppg = pd.read_excel('./morocco_data/morocco-ppg.xlsx')
# Rename columns in order to use the convert_to_geodataframe function
morocco_ppg.rename(columns={'IMB_Affinity': 'IMB Affinity Group', 
                            'Affinity Bloc': 'Affinity Bloc', 
                            'People_Group': 'People Group', 
                            'People_Name':'People Name'}, inplace=True)

In [None]:
morocco_ppg

In [None]:
morocco_ppg_areas[morocco_ppg_areas['Ctry'] == 'Morocco']

In [None]:
moroccan_ppgs = morocco_ppg.merge(morocco_ppg_areas, how='left', left_on='People Group', right_on='NmDisp')
moroccan_ppgs

In [None]:
shapes = gpd.read_file('./morocco_data/geodata/geoBoundaries-MAR-ADM1.shp')
shapes.head()

def adm1(ppg_df, shapes, adm1_name):
    '''
    Parameter(s): DataFrame (People groups data frame. Should be the cleaned result of the find_ppg_data function.
    Parameter(s): GeoDataFrame (GeoDataFrame from adm1 shapefile)
    Parameter(s): String (Name of the administrative level 1 subdivision name - e.g. Province)
    Note: PeopleGroups points that fall outside of an administrative region have their ADM1 subdivision encoded as "MISSING".
    These groups need to have their information manually filled in.
    '''

    subdivisions = []
    for i in range(len(ppg_df.index)):
        # Create a coordinate based on a People Group's latitude and longitude
        curr_polygon = ppg_df['geometry'].iloc[i]
        grouped_subdivisions = shapes[shapes['geometry'].overlaps(curr_polygon) == True].index.values
        
        if len(grouped_subdivisions) == 0:
            grouped_subdivisions = ['MISSING']
        subdivisions.append(grouped_subdivisions[0])
    matches = [shapes['shapeName'].iloc[i] for i in subdivisions]
    ppg_df[adm1_name] = matches
    print(f"Please check the generated {adm1_name} column for missing values, encoded as 'MISSING'")
    print(f"There are {sum(ppg_df[adm1_name] == 'MISSING')} unassigned people groups.")
    return ppg_df

adm1(moroccan_ppgs, shapes, "Region")

In [None]:
morocco_gdf = gpd.GeoDataFrame(moroccan_ppgs, geometry='geometry')

In [None]:
morocco_gdf.explore()

In [None]:
t = morocco_gdf.loc[morocco_gdf['People Group'] == "Saharawi", 'geometry'].iloc[0]

In [None]:
type(t)

In [None]:
morocco_gdf;
shapes;

`morocco_gdf CRS`: 3857  
`shapes CRS`: 4326

In [None]:
# change crs of morocco_gdf to 4326

g = morocco_gdf.to_crs(4326)

In [None]:
t = shapes.explore(
    color='red'
)

In [None]:
g.explore(
    m=t
)

In [None]:
tachelhit = g.iloc[2]['geometry']

In [None]:
shapes.geometry.overlaps(tachelhit)

In [None]:
shapes.iloc[[3,4,5,7,11]]

In [None]:
riffis = g.iloc[1]['geometry']

In [None]:
g

In [None]:
#  FIRST WORKING TEST

morocco_gdf;
shapes;
# morocco_gdf CRS: 3857  
# shapes CRS: 4326

g = morocco_gdf.to_crs(4326)
# do a test of the tachelhit people group 
tachelhit = g.iloc[2]['geometry']
# shapes where tachelhit overlaps the geometry column
k =  shapes.geometry.overlaps(tachelhit)
# select those boundaries


In [None]:
morocco_gdf.crs

In [None]:
def find_all_adm1(ppg_gdf, shapes, adm1_name):
    # arbitrarily chose Coordinate Reference System (CRS) to be 4326
    ppg_gdf = ppg_gdf.to_crs(4326) 
    shapes = shapes.to_crs(4326)
    boundaries = []
    
    for people_polygon in ppg_gdf.geometry:
        # true false series
        overlapping_polygons = shapes.geometry.overlaps(people_polygon)
        
        # from stack overflow - select series indices based on True values
        indices = overlapping_polygons[overlapping_polygons].index.values
        
        # select the names of the boundaries
        all_boundaries_found = shapes.iloc[indices].shapeName.tolist()
        
        if len(all_boundaries_found) == 0:
            boundaries.append('NONE')
        else:
            # stored in a string to make it look nice in the DataFrame
            boundaries_string = ', '.join(all_boundaries_found)
            boundaries.append(boundaries_string)
            
    ppg_gdf[f'{adm1_name}s Present'] = boundaries
    # filter out the NONE values
    return ppg_gdf[ppg_gdf[f'{adm1_name}s Present'] != 'NONE']

In [None]:
y = find_all_adm1(morocco_gdf, shapes, 'Region')
y

---

In [2]:
import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML documents

wikiurl = 'https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes'
table_class="wikitable sortable jquery-tablesorter"
response=requests.get(wikiurl)

soup = BeautifulSoup(response.text, 'html.parser')
indiatable = soup.find('table',{'class':"wikitable"})

iso_country_codes = pd.read_html(str(indiatable))

# convert list to dataframe
iso_country_codes = pd.DataFrame(iso_country_codes[0]).droplevel(0, axis=1).rename({'Country name[5]':'Country', 'Alpha-3 code[5]':'Alpha 3 code'}, axis=1)

iso_country_codes = iso_country_codes[['Country', 'Alpha 3 code']]

In [None]:
iso_country_codes

In [None]:
cgaz = gpd.read_file('./cgaz/geoBoundariesCGAZ_ADM1.shp', encoding='utf-8')

cgaz_named = cgaz.merge(iso_country_codes, left_on='shapeGroup', right_on='Alpha 3 code', how='left').drop(['LEVEL', 'Alpha 3 code'], axis=1).drop([3209, 3210], axis=0)

In [None]:
cgaz_named.to_csv('cgaz_named.csv')

---

<hr style="border: 5px solid #003262;" />
<hr style="border: 1px solid #fdb515;" />

# Loading data remotely

In [32]:
subnational_boundaries = gpd.read_file('https://github.com/andrewjoc/ihs/raw/main/people_groups_verification/data/cgaz.topojson')

In [34]:
subnational_population = pd.read_csv('https://github.com/andrewjoc/ihs/raw/main/people_groups_verification/data/subnationalPopulation.csv')

In [39]:
people_points = pd.read_excel('https://github.com/andrewjoc/ihs/raw/main/people_groups_verification/data/People_Groups.xlsx')

### people_areas is from ArcGIS -> ~130 MB which is too large for github
### attemped to use LFS but it seems like it doesn't work. interested user must download data from https://go-imb.opendata.arcgis.com/datasets/imb::apg-people-group-areas/explore?location=43.103723%2C70.948650%2C4.70

In [None]:
people_areas = gpd.read_file('data/people_areas.geojson')

In [None]:
people_areas