In [None]:
"""

from django.contrib.gis.db import models as geomodels
from cities_light.abstract_models import (
    AbstractCity,
    AbstractCountry,
    AbstractRegion,
    AbstractSubRegion,
)
from cities_light.receivers import connect_default_signals


class Country(AbstractCountry):
    boundary = geomodels.MultiPolygonField(null=True, blank=True)

connect_default_signals(Country)

class Region(AbstractRegion):
    boundary = geomodels.MultiPolygonField(null=True, blank=True)

connect_default_signals(Region)

class SubRegion(AbstractSubRegion):
    boundary = geomodels.MultiPolygonField(null=True, blank=True)

connect_default_signals(SubRegion)

class City(AbstractCity):
    boundary = geomodels.MultiPolygonField(null=True, blank=True)

connect_default_signals(City)
"""

In [None]:
!pip install pygadm geonames-lib matplotlib geopandas folium shapely


In [None]:
import urllib.request as request
import zipfile
import os

# URL of the geonames dataset
url = 'http://download.geonames.org/export/dump/allCountries.zip'

# Paths for the zip file and the extracted txt file
zip_path = 'geonames/allCountries.zip'
txt_path = 'geonames/allCountries.txt'

# Check if the txt file already exists
if not os.path.exists(txt_path):
    # Check if the zip file directory exists, if not create it
    os.makedirs(os.path.dirname(zip_path), exist_ok=True)

    # Download and save the zip file
    with request.urlopen(url) as response, open(zip_path, 'wb') as out_file:
        data = response.read()
        out_file.write(data)

    # Extract the file from the zip
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extract(txt_path)

    print("Downloaded and saved dataset.")
else:
    print("Dataset already exists.")


In [None]:
import geonames
import gc  # Garbage Collector

# Assuming txt_path is defined and points to the allCountries.txt file

# Initialize the GeoNames object and load the data
geo_data = geonames.GeoNames(txt_path)
geonames_data = geo_data.data[['geonameid', 'name', 'latitude', 'longitude']]

# Convert to appropriate data types
geonames_data['geonameid'] = geonames_data['geonameid'].astype('int32')
geonames_data['latitude'] = geonames_data['latitude'].astype('float32')
geonames_data['longitude'] = geonames_data['longitude'].astype('float32')
# Apply similar conversions to other columns as appropriate

# Clear memory of the original full data
del geo_data
gc.collect()  # Explicitly trigger garbage collection


In [None]:
import pandas as pd
from tqdm.notebook import tqdm
import geopandas as gpd

import setup_django
from geoprod.cities.models import City, Region, Country, SubRegion
import pygadm

In [None]:
def get_all_boundaries(content_level, countries):
    """
    Fetches and concatenates administrative boundaries for all countries.

    Args:
    content_level (int): The administrative level to fetch for each country.

    Returns:
    GeoDataFrame: A GeoDataFrame containing boundaries for all countries.
    """
    # Initialize an empty GeoDataFrame
    all_boundaries_gdf = gpd.GeoDataFrame()
    
    for country in tqdm(countries.iterator(), total=countries.count(), desc='Fetching Boundaries'):
        try:
            # Fetch administrative boundaries
            boundaries = pygadm.AdmItems(admin=country.code3, content_level=content_level)
            boundaries_gdf = gpd.GeoDataFrame.from_features(boundaries).set_crs(epsg=4326, inplace=True)

            # Add a column for the country code
            boundaries_gdf['country_code'] = country.code3

            # Concatenate with the main GeoDataFrame
            all_boundaries_gdf = pd.concat([all_boundaries_gdf, boundaries_gdf], ignore_index=True)
        except Exception as e:
            print(f"Error processing country {country.name}: {e}")

    return all_boundaries_gdf



In [None]:
from shapely.geometry import Point
import geopandas as gpd

def get_location_row(geonames_data, geoname_id):
    """
    Fetches the row from the GeoNames dataset for a given geoname_id and creates a Point object.

    Args:
    geonames_data (DataFrame): The GeoNames DataFrame.
    geoname_id (int): The geoname_id of the country or entity.

    Returns:
    DataFrame row, Point: The corresponding row from the DataFrame and a Point object.
    """
    # Fetch the row corresponding to the geoname_id
    entity_row = geonames_data[geonames_data['geonameid'] == geoname_id]

    if not entity_row.empty:
        # Extract latitude and longitude
        latitude = entity_row.iloc[0].latitude
        longitude = entity_row.iloc[0].longitude

        # Create a Point object
        point = Point(longitude, latitude)

        return entity_row, point
    else:
        return None, None



In [None]:
north_america_country_codes = [
    'CA',  # Canada
    'US',  # United States
    'MX',  # Mexico
    'GL',  # Greenland
    # Add other North American countries if needed
]
central_west_europe_country_codes = [
    'AT',  # Austria
    'BE',  # Belgium
    'CH',  # Switzerland
    'DE',  # Germany
    'DK',  # Denmark
    'ES',  # Spain
    'FI',  # Finland
    'FR',  # France
    'GB',  # United Kingdom
    'IE',  # Ireland
    'IS',  # Iceland
    'LI',  # Liechtenstein
    'LU',  # Luxembourg
    'MC',  # Monaco
    'NL',  # Netherlands
    'NO',  # Norway
    'PT',  # Portugal
    'SE',  # Sweden
    'PL',  # Poland
    'GR',  # Greece
    # Add other Central and Western European countries if needed
]


In [None]:
selected_country_codes = north_america_country_codes + central_west_europe_country_codes


In [None]:
from tqdm.notebook import tqdm
from shapely.geometry import MultiPolygon
from django.contrib.gis.geos import GEOSGeometry
from django.db import transaction
import gc  # Garbage Collector

def assign_boundaries(entities, boundaries_gdf, geonames_data, entity_type='entity'):
    """
    Assigns boundaries to given entities (countries, regions, subregions, cities).

    Args:
    entities (QuerySet): QuerySet of entities (e.g., countries, regions).
    boundaries_gdf (GeoDataFrame): GeoDataFrame containing boundaries.
    geonames_data (DataFrame): DataFrame with location data.
    entity_type (str): Type of entity (for logging purposes).
    """
    # Ensure boundaries_gdf has an active geometry column
    if not boundaries_gdf._geometry_column_name:
        boundaries_gdf = boundaries_gdf.set_geometry('geometry')

    # Create a spatial index for the boundaries GeoDataFrame
    if boundaries_gdf.sindex is None:
        boundaries_gdf.sindex

    # Process in batches
    batch_size = 100
    entities_to_update = []

    for entity in tqdm(entities.iterator(), total = entities.count(), desc=f'Processing {entity_type}s'):
        entity_row, point = get_location_row(geonames_data, entity.geoname_id)

        if point:
            entity_boundary = boundaries_gdf[boundaries_gdf.contains(point)]
            if not entity_boundary.empty:
                geometry = entity_boundary.geometry.iloc[0]
                if isinstance(geometry, MultiPolygon):
                    entity.boundary = GEOSGeometry(geometry.wkt)
                    entities_to_update.append(entity)

                    if len(entities_to_update) >= batch_size:
                        with transaction.atomic():
                            for ent in entities_to_update:
                                ent.save()
                        entities_to_update = []
                        gc.collect()  # Trigger garbage collection

                else:
                    print(f"Geometry for {entity} is not a MultiPolygon.")
            else:
                print(f"No boundary found for {entity}.")
        else:
            print(f"No location data for {entity}.")

    # Save any remaining entities
    with transaction.atomic():
        for ent in entities_to_update:
            ent.save()
    gc.collect()  # Trigger garbage collection after batch processing


In [None]:
from django.db import transaction
import gc  # Garbage Collector

# Assuming get_all_boundaries and assign_boundaries functions are defined

# Filter countries
europe_america_countries = Country.objects.filter(code2__in=selected_country_codes)

# Get boundaries for filtered countries
all_countries_boundaries_gdf = get_all_boundaries(0, europe_america_countries)

# Assign boundaries to countries
assign_boundaries(europe_america_countries, all_countries_boundaries_gdf, geonames_data, 'country')

# Clear memory
del all_countries_boundaries_gdf
gc.collect()  # Explicitly trigger garbage collection


In [None]:
from django.db import transaction
import gc  # Garbage Collector

# Assuming get_all_boundaries and assign_boundaries functions are defined

# Get boundaries for regions
all_regions_boundaries_gdf = get_all_boundaries(1, europe_america_countries)

# Filter regions
europe_america_regions = Region.objects.filter(country__in=europe_america_countries)

# Assign boundaries to regions
assign_boundaries(europe_america_regions, all_regions_boundaries_gdf, geonames_data, 'region')

# Clear memory
del all_regions_boundaries_gdf
gc.collect()  # Explicitly trigger garbage collection


In [None]:
from django.db import transaction
import gc  # Garbage Collector

# Assuming get_all_boundaries and assign_boundaries functions are defined

# Filter subregions
europe_america_subregions = SubRegion.objects.filter(region__in=europe_america_regions)

# Get boundaries for subregions
all_subregions_boundaries_gdf = get_all_boundaries(2, europe_america_countries)

# Assign boundaries to subregions
assign_boundaries(europe_america_subregions, all_subregions_boundaries_gdf, geonames_data, 'subregion')

# Clear memory
del all_subregions_boundaries_gdf
gc.collect()  # Explicitly trigger garbage collection


In [None]:
from django.db import transaction
import gc  # Garbage Collector

# Assuming get_all_boundaries and assign_boundaries functions are defined

# Filter cities
europe_america_cities = City.objects.filter(region__in=europe_america_regions)

# Get boundaries for cities
all_cities_boundaries_gdf = get_all_boundaries(3, europe_america_countries)

# Assign boundaries to cities
assign_boundaries(europe_america_cities, all_cities_boundaries_gdf, geonames_data, 'city')

# Clear memory
del all_cities_boundaries_gdf
gc.collect()  # Explicitly trigger garbage collection
