In [44]:
import geopandas as gpd
import shutil
import os

# Load datasets
print("Loading datasets...")
parks = gpd.read_file("parks.gpkg")
berlin_neighborhoods = gpd.read_file("berlin.gpkg")
print("Datasets loaded.")

# Aggregate park areas by district and neighborhood
print("Aggregating park areas...")
district_park_areas = parks.dissolve(by='District', aggfunc={'Area [m^2]': 'sum'}).rename(columns={'Area [m^2]': 'ParkAreaDst'})
neighborhood_park_areas = parks.dissolve(by='Neighborhood', aggfunc={'Area [m^2]': 'sum'}).rename(columns={'Area [m^2]': 'ParkAreaNb'})
print("Park areas aggregated.")

# Dissolve berlin_neighborhoods to get district-level and east/west level data
print("Dissolving neighborhoods by district and east/west...")
berlin_districts = berlin_neighborhoods.dissolve(by='District', aggfunc={'Area Neighborhood [m^2]': 'sum', 'Population': 'sum'}).reset_index()
east_west_berlin = berlin_neighborhoods.dissolve(by='Former Side', aggfunc={'Area Neighborhood [m^2]': 'sum', 'Population': 'sum'}).reset_index()
print("Dissolution complete.")

# Join park areas with berlin district and neighborhood data
print("Joining park areas with district and neighborhood data...")
berlin_districts = berlin_districts.merge(district_park_areas, on='District', how='left', suffixes=('', '_drop'))
berlin_neighborhoods = berlin_neighborhoods.merge(neighborhood_park_areas, on='Neighborhood', how='left', suffixes=('', '_drop'))
print("Join complete.")

# Drop unnecessary columns
berlin_districts.drop(columns=[col for col in berlin_districts.columns if '_drop' in col], inplace=True)
berlin_neighborhoods.drop(columns=[col for col in berlin_neighborhoods.columns if '_drop' in col], inplace=True)

# Calculate park area density for each district and neighborhood based on area
print("Calculating park area density based on area...")
berlin_districts['PAD_Dist'] = berlin_districts['ParkAreaDst'] / berlin_districts['Area Neighborhood [m^2]'] * 100
berlin_neighborhoods['PAD_Nbh'] = berlin_neighborhoods['ParkAreaNb'] / berlin_neighborhoods['Area Neighborhood [m^2]'] * 100
print("Area density calculation complete for districts and neighborhoods.")

# Calculate park area density for each district and neighborhood based on population
print("Calculating park area density based on population...")
berlin_districts['PAP_Dist'] = berlin_districts['ParkAreaDst'] / berlin_districts['Population']
berlin_neighborhoods['PAP_Nbh'] = berlin_neighborhoods['ParkAreaNb'] / berlin_neighborhoods['Population']
print("Population density calculation complete for districts and neighborhoods.")

# Fill NA values with 0 for both density calculations
print("Filling NA values...")
berlin_districts[['PAD_Dist', 'PAP_Dist']].fillna(0, inplace=True)
berlin_neighborhoods[['PAD_Nbh', 'PAP_Nbh']].fillna(0, inplace=True)

# Calculate density for East and West Berlin
print("Calculating density for East and West Berlin...")
east_west_park_areas = parks.dissolve(by='Former Side', aggfunc={'Area [m^2]': 'sum'}).rename(columns={'Area [m^2]': 'ParkAreaEW'})
east_west_berlin = east_west_berlin.merge(east_west_park_areas, on='Former Side', how='left', suffixes=('', '_drop'))
east_west_berlin.drop(columns=[col for col in east_west_berlin.columns if '_drop' in col], inplace=True)
east_west_berlin['PAD_EW'] = east_west_berlin['ParkAreaEW'] / east_west_berlin['Area Neighborhood [m^2]'] * 100
east_west_berlin['PAP_EW'] = east_west_berlin['ParkAreaEW'] / east_west_berlin['Population']

east_west_berlin[['PAD_EW', 'PAP_EW']].fillna(0, inplace=True)
print("East/West Berlin density calculation complete.")

# Save data to Shapefiles and then zip them
def save_and_zip(gdf, filename):
    # Save as Shapefile
    file_path = f"./{filename}"
    gdf.to_file(file_path)
    # Zip the folder
    shutil.make_archive(file_path, 'zip', file_path)
    # Remove the original Shapefile directory
    shutil.rmtree(file_path)
    print(f"Saved and zipped {filename}")

print("Saving data to Zipped Shapefiles...")
save_and_zip(berlin_districts, "density_districts")
save_and_zip(berlin_neighborhoods, "density_neighborhoods")
save_and_zip(east_west_berlin, "density_east_west")
print("All data has been saved to Zipped Shapefiles.")

Loading datasets...
Datasets loaded.
Aggregating park areas...
Park areas aggregated.
Dissolving neighborhoods by district and east/west...
Dissolution complete.
Joining park areas with district and neighborhood data...
Join complete.
Calculating park area density based on area...
Area density calculation complete for districts and neighborhoods.
Calculating park area density based on population...
Population density calculation complete for districts and neighborhoods.
Filling NA values...
Calculating density for East and West Berlin...


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  berlin_districts[['PAD_Dist', 'PAP_Dist']].fillna(0, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  berlin_neighborhoods[['PAD_Nbh', 'PAP_Nbh']].fillna(0, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  east_west_berlin[['PAD_EW', 'PAP_EW']].fillna(0, inplace=True)
  gdf.to_file(file_path)
  gdf.to_file(file_path)


East/West Berlin density calculation complete.
Saving data to Zipped Shapefiles...
Saved and zipped density_districts
Saved and zipped density_neighborhoods
Saved and zipped density_east_west
All data has been saved to Zipped Shapefiles.


  gdf.to_file(file_path)
