### 1.1 Data Acquisition and Alignment (15/100 points)

#### 1.1.1 Download and Process OSM Data

In [None]:
import pyrosm
import rasterio
import matplotlib.pyplot as plt
from rasterio.plot import show
import geopandas as gpd

def download_and_process_osm_data(city_name):
    print(f"Downloading and processing OSM data for {city_name}...")
    # Download the data into specified directory
    fp = pyrosm.get_data(f"{city_name}", directory="./")
    print(f"{city_name} data was downloaded to:", fp)
    
    # Initialize the OSM object 
    osm = pyrosm.OSM(fp)

    buildings = osm.get_buildings()
    print("OSM data get_buildings done.")
    print("Plotting....")
    buildings.plot()


    # Convert to GeoDataFrame
    gdf = gpd.GeoDataFrame(buildings, geometry='geometry')
    # Get coordinate bounds
    bbox = gdf.total_bounds
    
    # Filter everything other than buildings
    gdf = gdf[gdf["building"].notnull()]

    print(f"Finished processing OSM data for {city_name}.")

    return gdf, bbox

In [None]:
# List of 10 'big' cities
cities = ["Berlin"]

# Loop through each city
for city in cities:
    # Download and process OSM data
    gdf, bbox = download_and_process_osm_data(city)
    print(f"Bounding box for {city}: {bbox}")
    print(f"Number of buildings in {city}: {len(gdf)}")
    print(f"CRS of buildings in {city}: {gdf.crs}")

In [None]:
# Function to plot buildings
def plot_buildings(gdf):
    # Plot the buildings
    gdf.plot(figsize=(10, 10))
    plt.title("Buildings from OpenStreetMap")
    plt.xlabel("Longitude")
    plt.ylabel("Latitude")
    plt.show()

# Plot buildings
plot_buildings(gdf)

#### 1.1.1 Download Sentinel-2 L2A Data Using OpenEO

In [62]:
import openeo

def connect_to_openeo():
    print("Connecting to OpenEO...")
    connection = openeo.connect("https://openeo.dataspace.copernicus.eu")
    connection.authenticate_oidc()
    print("Connected to OpenEO.")
    return connection

def download_sentinel2_images_openeo(connection, bbox, start_date, end_date, cloud_cover_percentage):
    # Define the area of interest
    spatial_extent = {
        "west": bbox[0],
        "south": bbox[1],
        "east": bbox[2],
        "north": bbox[3]
    }
    
    # Define the process graph
    datacube = connection.load_collection(
        "SENTINEL2_L2A",
        spatial_extent = spatial_extent,
        temporal_extent = [start_date, end_date],
        bands = ["B04", "B03", "B02"]
    )

    result = datacube.save_result("GTiff")
    # Creating a new job at the back-end by sending the datacube information.
    job = result.create_job()
    # Starts the job and waits until it finished to download the result.
    job.start_and_wait()
    job.get_results().download_files("openeo_output")

In [None]:
bbox = [13.294333, 52.454927, 13.500205, 52.574409]
# Y-M-D
start_date = '2023-06-01T12:00:00Z'
end_date = '2023-06-30T12:00:00Z'
cloud_cover_percentage = 0.01

#gdf = download_and_process_osm_data(bbox)
connection = connect_to_openeo()
sentinel2_path = download_sentinel2_images_openeo(connection, bbox, start_date, end_date, cloud_cover_percentage)
#plot_data(gdf, sentinel2_path)


In [49]:
def plot_data(gdf, raster_path):
    print("Plotting data...")
    fig, ax = plt.subplots(1, 1, figsize=(10, 10))
    gdf.plot(ax=ax, facecolor='none', edgecolor='red', label='Buildings')
    
    with rasterio.open(raster_path) as src:
        show(src, ax=ax, title="Sentinel 2 Image with Buildings Overlay")
    
    plt.legend()
    plt.show()
    print("Data plotted.")

### 1.2 Data Preparation (25/100 points)

### 1.3 Modeling and Tuning (35/100 points)

### 1.4 Data Augmentation (25/100 points)