## Data File Conversion

This notebook exists solely to convert our KML/CSV/SHP data to usable geojson data.

In [50]:
import json
import kml2geojson
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
import altair as alt

#### Converting the Marine Debris Tracker Data to GeoJSON

In [33]:
mdt = pd.read_csv("mdt-dataChennai.csv")
mdt.head()

Unnamed: 0,list_name,master_item_name,master_material,itemname,material,quantity,description,latitude,longitude,altitude,radius,location,timestamp,dt,project_name,username,manual_upload,event_name,id,log_index
0,Primary List,Mask,PLASTIC,Fabric Face Masks,CLOTH,3,,12.909849,80.171725,-76.354335,3.517,"Sholinganallur, India",20210730124137,2021-07-30 12:41:37,Default,102375240 (SciStarter),0,,1667588,42857
1,Primary List,Other Plastic,PLASTIC,Other Plastic,PLASTIC,3,,12.909449,80.171378,-79.029443,3.394,"Sholinganallur, India",20210730124236,2021-07-30 12:42:36,Default,102375240 (SciStarter),0,,1667593,42857
2,Primary List,Other Plastic,PLASTIC,Other Plastic,PLASTIC,2,,13.006676,80.262749,-61.406507,6.541,"Chennai, Tamil Nadu, India",20210730123721,2021-07-30 12:37:21,Default,102350042 (SciStarter),0,,1667625,42862
3,Primary List,Paper and Cardboard,PAPER & LUMBER,Paper and Cardboard,PAPER & LUMBER,1,,12.825123,80.204952,9.42675,65.0,India,20210730122843,2021-07-30 12:28:43,Default,102375156 (SciStarter),0,,1667501,42847
4,Primary List,Paper and Cardboard,PAPER & LUMBER,Paper and Cardboard,PAPER & LUMBER,2,,13.061154,80.225476,-70.638901,7.449,"Chennai, Tamil Nadu, India",20210730124040,2021-07-30 12:40:40,Default,102375247 (SciStarter),0,,1667605,42860


In [40]:
geo_mdt = gpd.GeoDataFrame(mdt, 
                        geometry=gpd.points_from_xy(mdt.longitude, mdt.latitude))

In [43]:
geo_mdt = geo_mdt.set_crs('EPSG: 4326')

In [45]:
geo_mdt.to_file("mdt-dataChennai.geojson", driver='GeoJSON')

#### Adding Population per Ward CSV to the Wards GeoJSON

In [46]:
wards = gpd.read_file("gcc-divisions-latest.geojson")
wards.head()

Unnamed: 0,Name,Description,geometry
0,168,,"MULTIPOLYGON Z (((80.20975 13.02585 0.00000, 8..."
1,169,,"MULTIPOLYGON Z (((80.23914 13.03186 0.00000, 8..."
2,170,,"MULTIPOLYGON Z (((80.24917 13.01847 0.00000, 8..."
3,171,,"MULTIPOLYGON Z (((80.27906 13.02775 0.00000, 8..."
4,174,,"MULTIPOLYGON Z (((80.27704 13.01383 0.00000, 8..."


In [29]:
df = pd.read_csv('Chennai_pop_2020estimation.csv')
df.head()

Unnamed: 0,ZoneNo,Div,ZoneName,WardName,PopulationEsti2020,Area(km2),PopulationDensity
0,I,1.0,THIRUVOTTIYUR,Sathyavani muthu nagar,3050,1.73,1758
1,I,2.0,THIRUVOTTIYUR,Kathivakkam,9553,2.92,3274
2,I,3.0,THIRUVOTTIYUR,Annai Sivagami nagar,14743,3.16,4660
3,I,4.0,THIRUVOTTIYUR,Ernavoor,11901,2.08,5718
4,I,5.0,THIRUVOTTIYUR,Wimco nagar,22818,2.32,9833


In [31]:
df['Div'] = df['Div'].astype(str)
df['Div'] = df['Div'].str[:-2]
df.head()

Unnamed: 0,ZoneNo,Div,ZoneName,WardName,PopulationEsti2020,Area(km2),PopulationDensity
0,I,1,THIRUVOTTIYUR,Sathyavani muthu nagar,3050,1.73,1758
1,I,2,THIRUVOTTIYUR,Kathivakkam,9553,2.92,3274
2,I,3,THIRUVOTTIYUR,Annai Sivagami nagar,14743,3.16,4660
3,I,4,THIRUVOTTIYUR,Ernavoor,11901,2.08,5718
4,I,5,THIRUVOTTIYUR,Wimco nagar,22818,2.32,9833


In [48]:
merged_data = wards.merge(df, how='inner',
                        left_on='Name',
                        right_on='Div')
merged_data.head()

Unnamed: 0,Name,Description,geometry,ZoneNo,Div,ZoneName,WardName,PopulationEsti2020,Area(km2),PopulationDensity
0,168,,"MULTIPOLYGON Z (((80.20975 13.02585 0.00000, 8...",XIV,168,PERUNGUDI,,81399,3.57,22832
1,169,,"MULTIPOLYGON Z (((80.23914 13.03186 0.00000, 8...",XIV,169,PERUNGUDI,,61763,3.77,16382
2,170,,"MULTIPOLYGON Z (((80.24917 13.01847 0.00000, 8...",XIII,170,ADYAR,Guindy,43988,2.7,16275
3,171,,"MULTIPOLYGON Z (((80.27906 13.02775 0.00000, 8...",XIII,171,ADYAR,,46785,3.35,13957
4,174,,"MULTIPOLYGON Z (((80.27704 13.01383 0.00000, 8...",XIII,174,ADYAR,,46484,3.46,13416


In [52]:
merged_data.to_file("ChennaiWards_pop2020estimation.geojson", driver='GeoJSON')

#### Failed conversion to GeoJSON from KML

In [20]:
##dunno why this isn't working for me but the correct way is in the r file
gdf = gpd.read_file('chennai_basin_waterbodies.kml', driver='KML')
gdf.to_file('chennai_basin_waterbodies.geojson', driver='GeoJSON')

DriverError: unsupported driver: 'KML'