## GEOJSON ##

In [1]:
# import libraries

import pandas as pd
import geopandas as gpd

In [2]:
# import geojson files

geography = gpd.read_file('combined_geographies_geojson.geojson') # output from CP-part1

In [3]:
# to split the multipolygon into individual polygons | results to more rows

geography_clean = geography.explode()
geography_clean = geography_clean.reset_index(drop = True)
geography_clean.head()

  geography_clean = geography.explode()


Unnamed: 0,GeoUID,Region.Name,provincename,geometry
0,4801003,Cypress County (MD),Alberta,"POLYGON ((-110.00300 50.83380, -110.00330 50.8..."
1,4801006,Medicine Hat (CY),Alberta,"POLYGON ((-110.80100 50.05270, -110.79930 50.0..."
2,4801008,Forty Mile County No. 8 (MD),Alberta,"POLYGON ((-110.67380 48.99870, -110.75010 48.9..."
3,4801009,Foremost (VL),Alberta,"POLYGON ((-111.43600 49.46490, -111.43760 49.4..."
4,4801014,Bow Island (T),Alberta,"POLYGON ((-111.36920 49.88710, -111.36920 49.8..."


In [4]:
# to get the minimun and maximum values of longitude and latitude from the geometry column

geojson_bound_coordinates = geography_clean['geometry']
min_max_xy = geojson_bound_coordinates.bounds

min_max_xy

Unnamed: 0,minx,miny,maxx,maxy
0,-111.6984,48.9987,-110.0030,50.8338
1,-110.8010,49.9893,-110.6177,50.1132
2,-111.9016,48.9972,-110.5424,50.0692
3,-111.4631,49.4649,-111.4358,49.4846
4,-111.3930,49.8588,-111.3295,49.8874
...,...,...,...,...
32802,-136.0258,60.7660,-135.2729,60.8963
32803,-132.8086,60.1903,-132.7264,60.2672
32804,-134.7041,60.2764,-133.8828,60.6793
32805,-135.9887,60.7496,-134.9649,61.2700


In [5]:
# to append the min and max values of longitude and latitude to the clean df

geography_clean['min_longitude'] = min_max_xy['minx']
geography_clean['min_latitude'] = min_max_xy['miny']
geography_clean['max_longitude'] = min_max_xy['maxx']
geography_clean['max_latitude'] = min_max_xy['maxy']

# to check updated dataset
geography_clean

Unnamed: 0,GeoUID,Region.Name,provincename,geometry,min_longitude,min_latitude,max_longitude,max_latitude
0,4801003,Cypress County (MD),Alberta,"POLYGON ((-110.00300 50.83380, -110.00330 50.8...",-111.6984,48.9987,-110.0030,50.8338
1,4801006,Medicine Hat (CY),Alberta,"POLYGON ((-110.80100 50.05270, -110.79930 50.0...",-110.8010,49.9893,-110.6177,50.1132
2,4801008,Forty Mile County No. 8 (MD),Alberta,"POLYGON ((-110.67380 48.99870, -110.75010 48.9...",-111.9016,48.9972,-110.5424,50.0692
3,4801009,Foremost (VL),Alberta,"POLYGON ((-111.43600 49.46490, -111.43760 49.4...",-111.4631,49.4649,-111.4358,49.4846
4,4801014,Bow Island (T),Alberta,"POLYGON ((-111.36920 49.88710, -111.36920 49.8...",-111.3930,49.8588,-111.3295,49.8874
...,...,...,...,...,...,...,...,...
32802,6001055,Ibex Valley (HAM),Yukon,"POLYGON ((-135.98870 60.82550, -135.95770 60.8...",-136.0258,60.7660,-135.2729,60.8963
32803,6001057,Teslin (TL),Yukon,"POLYGON ((-132.73940 60.23410, -132.73740 60.2...",-132.8086,60.1903,-132.7264,60.2672
32804,6001058,Marsh Lake (NO),Yukon,"POLYGON ((-134.20560 60.67930, -134.11920 60.6...",-134.7041,60.2764,-133.8828,60.6793
32805,6001059,Macpherson-Grizzly Valley (NO),Yukon,"POLYGON ((-134.96490 60.74960, -135.00170 60.7...",-135.9887,60.7496,-134.9649,61.2700


In [6]:
# to drop "geometry" column 

geography_clean.drop(columns = ['geometry'],
                             inplace = True)
geography_clean

Unnamed: 0,GeoUID,Region.Name,provincename,min_longitude,min_latitude,max_longitude,max_latitude
0,4801003,Cypress County (MD),Alberta,-111.6984,48.9987,-110.0030,50.8338
1,4801006,Medicine Hat (CY),Alberta,-110.8010,49.9893,-110.6177,50.1132
2,4801008,Forty Mile County No. 8 (MD),Alberta,-111.9016,48.9972,-110.5424,50.0692
3,4801009,Foremost (VL),Alberta,-111.4631,49.4649,-111.4358,49.4846
4,4801014,Bow Island (T),Alberta,-111.3930,49.8588,-111.3295,49.8874
...,...,...,...,...,...,...,...
32802,6001055,Ibex Valley (HAM),Yukon,-136.0258,60.7660,-135.2729,60.8963
32803,6001057,Teslin (TL),Yukon,-132.8086,60.1903,-132.7264,60.2672
32804,6001058,Marsh Lake (NO),Yukon,-134.7041,60.2764,-133.8828,60.6793
32805,6001059,Macpherson-Grizzly Valley (NO),Yukon,-135.9887,60.7496,-134.9649,61.2700


In [7]:
# to get the min and max values from the duplicated rows

geography_clean = geography_clean.groupby(['GeoUID', 
                                            'Region.Name', 
                                            'provincename']).agg(min_longitude = ('min_longitude', 'min'),
                                                                 max_longitude = ('max_longitude', 'max'),
                                                                 min_latitude = ('min_latitude', 'min'),
                                                                 max_latitude = ('max_latitude', 'max')).reset_index()

In [8]:
# to check data

geography_clean

Unnamed: 0,GeoUID,Region.Name,provincename,min_longitude,max_longitude,min_latitude,max_latitude
0,1001101,"Division No. 1, Subd. V (SNO)",Newfoundland and Labrador,-53.6508,-53.0045,46.6111,46.9979
1,1001105,Portugal Cove South (T),Newfoundland and Labrador,-53.2618,-53.2498,46.7011,46.7170
2,1001113,Trepassey (T),Newfoundland and Labrador,-53.4166,-53.3158,46.6885,46.7835
3,1001120,St. Shott's (T),Newfoundland and Labrador,-53.5952,-53.5755,46.6289,46.6392
4,1001124,"Division No. 1, Subd. U (SNO)",Newfoundland and Labrador,-53.2651,-52.7789,46.7561,47.2894
...,...,...,...,...,...,...,...
5157,6208068,Umingmaktok (SET),Nunavut,-108.0283,-107.7360,67.6027,67.7568
5158,6208073,Cambridge Bay (HAM),Nunavut,-105.4180,-104.9311,69.0557,69.2229
5159,6208081,Gjoa Haven (HAM),Nunavut,-95.9975,-95.8105,68.6050,68.6769
5160,6208087,Taloyoak (HAM),Nunavut,-93.6245,-93.3805,69.5070,69.5977


In [9]:
# to output the file into a csv format

csv_path = 'geography_clean.csv'
geography_clean.to_csv(csv_path, 
                        index = False)