![Natural Earth logo](https://www.naturalearthdata.com/wp-content/themes/NEV/images/nev_logo.png "Natural Earth logo")
<div align="center">

## Scraping bodies of water data
</div>

Document explaining how bodies of water data is processed and saved in PostgreSQL database.

Link to data from Natural Earth: [bodies of water data](https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/physical/ne_10m_geography_marine_polys.zip)

<hr>

# 1. Download and load data

## Data is in shapefile format compressed into zip file. There is need to import geopandas to read spatial data and requests to get zip file from web.

In [None]:
import geopandas as gpd
import requests
import os

In [None]:
waters_link = "https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/physical/ne_10m_geography_marine_polys.zip"

## Get zip file and check status code. 200 is OK.

In [None]:
r = requests.get(waters_link ,stream=True, headers={"User-Agent": "XY"})
r.status_code

## Save zip file inside temp folder.

In [None]:
if not os.path.exists("../temp/waters/"):
    os.makedirs("../temp/waters/")

In [None]:
with open('../temp/waters/ne_10m_geography_marine_polys.zip', 'wb') as fd:
    for chunk in r.iter_content(chunk_size=128):
        fd.write(chunk)

In [None]:
zip_file = "zip://../temp/waters/ne_10m_geography_marine_polys.zip!ne_10m_geography_marine_polys.shp"

## Load shapefile into geopandas dataframe.

In [None]:
waters_shp = gpd.read_file(
    zip_file, layer='ne_10m_geography_marine_polys'
)

## Check number of row, columns and crs shapefile data.

In [None]:
waters_shp.shape

In [None]:
waters_shp.crs

## Modify dataframe to display only columns that are valuable for our dataset.

In [None]:
waters_shp.columns.values.tolist()

In [None]:
filter_waters = waters_shp[
    [
        "featurecla",
        "name",
        "note",
        "wikidataid",
        "ne_id",
        "geometry",
    ]
]

In [None]:
filter_waters

# 2. Split and view avaliable data

## Check body of water types

In [None]:
water_types = set(filter_waters["featurecla"])

In [None]:
water_types

## Filter and view all bays

In [None]:
filter_bay = filter_waters["featurecla"].eq("bay")

In [None]:
filter_waters[filter_bay].explore(popup=True)

## Filter and view all channels

In [None]:
filter_channel = filter_waters["featurecla"].eq("channel")

In [None]:
filter_waters[filter_channel].explore(popup=True)

## Filter and view all fjords

In [None]:
filter_fjord = filter_waters["featurecla"].eq("fjord")

In [None]:
filter_waters[filter_fjord].explore(popup=True)

## Filter and view all generic

In [None]:
filter_generic = filter_waters["featurecla"].eq("generic")

In [None]:
filter_waters[filter_generic].explore(popup=True)

## Filter and view all gulfs

In [None]:
filter_gulf = filter_waters["featurecla"].eq("gulf")

In [None]:
filter_waters[filter_gulf].explore(popup=True)

## Filter and view all inlets

In [None]:
filter_inlet = filter_waters["featurecla"].eq("inlet")

In [None]:
filter_waters[filter_inlet].explore(popup=True)

## Filter and view all lagoons

In [None]:
filter_lagoon = filter_waters["featurecla"].eq("lagoon")

In [None]:
filter_waters[filter_lagoon].explore(popup=True)

## Filter and view all oceans

In [None]:
filter_ocean = filter_waters["featurecla"].eq("ocean")

In [None]:
filter_waters[filter_ocean].explore(popup=True)

## Filter and view all reefs

In [None]:
filter_reef = filter_waters["featurecla"].eq("reef")

In [None]:
filter_waters[filter_reef].explore(popup=True)

## Filter and view all rivers

In [None]:
filter_river = filter_waters["featurecla"].eq("river")

In [None]:
filter_waters[filter_river].explore(popup=True)

## Filter and view all seas

In [None]:
filter_sea = filter_waters["featurecla"].eq("sea")

In [None]:
filter_waters[filter_sea].explore(popup=True)

## Filter and view all sounds

In [None]:
filter_sound = filter_waters["featurecla"].eq("sound")

In [None]:
filter_waters[filter_sound].explore(popup=True)

## Filter and view all straits

In [None]:
filter_strait = filter_waters["featurecla"].eq("strait")

In [None]:
filter_waters[filter_strait].explore(popup=True)

# 3. Save data in database

In [None]:
from apps.utils.converters import pandas_to_gis_multipolygon
from apps.landform.models import BodyOfWater, BodyOfWaterType

## Create body of water FK

In [None]:
[BodyOfWaterType.objects.get_or_create(name=item) for item in water_types if item]

## Iterate through body of water dataframe, convert data, get FK , update or create new entry in database.

In [None]:
for row in filter_waters.itertuples(index=False, name="Pandas"):

    geometry = pandas_to_gis_multipolygon(row.geometry)
    body_of_water_type = BodyOfWaterType.objects.filter(name=row.featurecla)
    body_of_water_type = body_of_water_type[0] if body_of_water_type else None

    updated_values = {
        "geometry": geometry,
        "wikidata_id": row.wikidataid,
        "body_of_water_type": body_of_water_type,
        "name": row.name,
    }

    BodyOfWater.objects.update_or_create(
        natural_earth_id=row.ne_id,
        defaults=updated_values,
    )