In [54]:
from pymongo import MongoClient
import fiona
import geopandas as gpd
import json

## Setup MongoDB Connection

In [2]:
mongo_uri = "mongodb://localhost:27017"

client = MongoClient(mongo_uri)

In [3]:
db = client.geocourse_data_engineering

## Load data and store it to MongoDB

In [4]:
file_path = "../../transform/output.gpkg"

In [6]:
layers = fiona.listlayers(file_path)

layers

['airports_indonesia',
 'brn_protected_areas',
 'sgp_protected_areas',
 'tls_protected_areas',
 'twitter_word_counts',
 'candi_jawatengah',
 'admin_kawasan_jateng',
 'rdtr_takengon',
 'rdtr_takengon_vertices',
 'indonesian_cities',
 'twitter_users',
 'twitter_context_annotations']

In [25]:
df_airport = gpd.read_file(file_path, layer="airports_indonesia").to_geo_dict()

In [27]:
len(df_airport["features"])

638

In [29]:
db["airport"].insert_many(df_airport["features"])

InsertManyResult([ObjectId('67cc7d56ca93a2f82c32d894'), ObjectId('67cc7d7eca93a2f82c32d895'), ObjectId('67cc7d7eca93a2f82c32d896'), ObjectId('67cc7d7eca93a2f82c32d897'), ObjectId('67cc7d7eca93a2f82c32d898'), ObjectId('67cc7d7eca93a2f82c32d899'), ObjectId('67cc7d7eca93a2f82c32d89a'), ObjectId('67cc7d7eca93a2f82c32d89b'), ObjectId('67cc7d7eca93a2f82c32d89c'), ObjectId('67cc7d7eca93a2f82c32d89d'), ObjectId('67cc7d7eca93a2f82c32d89e'), ObjectId('67cc7d7eca93a2f82c32d89f'), ObjectId('67cc7d7eca93a2f82c32d8a0'), ObjectId('67cc7d7eca93a2f82c32d8a1'), ObjectId('67cc7d7eca93a2f82c32d8a2'), ObjectId('67cc7d7eca93a2f82c32d8a3'), ObjectId('67cc7d7eca93a2f82c32d8a4'), ObjectId('67cc7d7eca93a2f82c32d8a5'), ObjectId('67cc7d7eca93a2f82c32d8a6'), ObjectId('67cc7d7eca93a2f82c32d8a7'), ObjectId('67cc7d7eca93a2f82c32d8a8'), ObjectId('67cc7d7eca93a2f82c32d8a9'), ObjectId('67cc7d7eca93a2f82c32d8aa'), ObjectId('67cc7d7eca93a2f82c32d8ab'), ObjectId('67cc7d7eca93a2f82c32d8ac'), ObjectId('67cc7d7eca93a2f82c32d8

In [39]:
find_data = db["airport"].find({
  "properties.type": "large_airport"
})

list_data = list(find_data)
print(list_data)
print(len(list_data))

[{'_id': ObjectId('67cc7d56ca93a2f82c32d894'), 'id': '0', 'type': 'Feature', 'properties': {'id': 26751, 'ident': 'WADD', 'type': 'large_airport', 'name': 'Denpasar I Gusti Ngurah Rai International Airport', 'latitude_deg': -8.748409, 'longitude_deg': 115.167123, 'elevation_ft': 14.0, 'continent': 'AS', 'country_name': 'Indonesia', 'iso_country': 'ID', 'region_name': 'Bali', 'iso_region': 'ID-BA', 'local_region': 'BA', 'municipality': 'Kuta, Badung', 'scheduled_service': 1, 'gps_code': 'WADD', 'icao_code': 'WADD', 'iata_code': 'DPS', 'local_code': None, 'home_link': 'http://www.angkasapura1.co.id/eng/location/bali.htm', 'wikipedia_link': 'https://en.wikipedia.org/wiki/Ngurah_Rai_International_Airport', 'keywords': 'WRRR, Bali, Denpasar International Airport, Denpasar', 'score': 1200, 'last_updated': '2025-02-21T20:46:40+00:00', 'source': 'OurAirports'}, 'geometry': {'type': 'Point', 'coordinates': [115.167123, -8.748409, 14.0]}}, {'_id': ObjectId('67cc7d7eca93a2f82c32d895'), 'id': '1',

In [42]:
[data["properties"]["name"] for data in list_data]

['Denpasar I Gusti Ngurah Rai International Airport',
 'Soekarno-Hatta International Airport',
 'Juanda International Airport',
 'Hasanuddin International Airport',
 'Kualanamu International Airport',
 'Dortheys Hiyo Eluay International Airport',
 'Sultan Aji Muhammad Sulaiman Sepinggan International Airport',
 'Dhoho International Airport']

## Load the rest data

In [43]:
geo_layers = []
nongeo_layers = []

for layer in layers[1:]:
  with fiona.open(file_path, layer=layer) as src:
    has_geometry = False      
    for feature in src:
      geometry = feature.get("geometry")
      if geometry:
        geom_type = geometry["type"]
        print(f"Layer '{layer}' contains valid geometry")
        has_geometry = True
        break
    
    if has_geometry:
      geo_layers.append(layer)
    else:
      nongeo_layers.append(layer)
      print(f"Layer '{layer}' has no valid geometries.")

Layer 'brn_protected_areas' contains valid geometry
Layer 'sgp_protected_areas' contains valid geometry
Layer 'tls_protected_areas' contains valid geometry
Layer 'twitter_word_counts' contains valid geometry
Layer 'candi_jawatengah' contains valid geometry
Layer 'admin_kawasan_jateng' contains valid geometry
Layer 'rdtr_takengon' contains valid geometry
Layer 'rdtr_takengon_vertices' contains valid geometry
Layer 'indonesian_cities' contains valid geometry
Layer 'twitter_users' has no valid geometries.
Layer 'twitter_context_annotations' has no valid geometries.


In [45]:
geo_layers

['brn_protected_areas',
 'sgp_protected_areas',
 'tls_protected_areas',
 'twitter_word_counts',
 'candi_jawatengah',
 'admin_kawasan_jateng',
 'rdtr_takengon',
 'rdtr_takengon_vertices',
 'indonesian_cities']

In [46]:
nongeo_layers

['twitter_users', 'twitter_context_annotations']

In [49]:
for layer in geo_layers:
  gdf = gpd.read_file(file_path, layer=layer)
  geo_dict = gdf.to_geo_dict()
  db[layer].insert_many(geo_dict["features"])
  print(f"{layer} successfully uploaded into MongoDB")

brn_protected_areas successfully uploaded into MongoDB
sgp_protected_areas successfully uploaded into MongoDB
tls_protected_areas successfully uploaded into MongoDB
twitter_word_counts successfully uploaded into MongoDB
candi_jawatengah successfully uploaded into MongoDB
admin_kawasan_jateng successfully uploaded into MongoDB
rdtr_takengon successfully uploaded into MongoDB
rdtr_takengon_vertices successfully uploaded into MongoDB
indonesian_cities successfully uploaded into MongoDB


In [62]:
for layer in nongeo_layers:
  gdf = gpd.read_file(file_path, layer=layer)
  json_dict = gdf.to_dict(orient="records")
  db[layer].insert_many(json_dict)
  print(f"{layer} successfully uploaded into MongoDB")

twitter_users successfully uploaded into MongoDB
twitter_context_annotations successfully uploaded into MongoDB
