In [1]:
# CONFIG CELL
from notebook_utils import set_root_directory

set_root_directory()

In [2]:
import geopandas as gpd
import pandas as pd

from app import constants
from app.missing_values_percentage_filter import MissingValuesPercentageFilter
from app.voronoi_transfomer import VoronoiTransformer

In [3]:
SENSOR_METADATA = "input_files/sensor_metadata.parquet"
MEASUREMENTS_1H = "input_files/measurements_1h.parquet"
POLAND_POLYGON = "polska.zip"

OUTPUT_FILE = "input_files/sensor_metadata_1h_with_voronoi.parquet"

In [4]:
poland = gpd.read_file(POLAND_POLYGON)
sensor_metadata = pd.read_parquet(SENSOR_METADATA)
measurements = pd.read_parquet(MEASUREMENTS_1H)
variable = constants.O3

In [5]:
vt = VoronoiTransformer(
    mask_polygon=poland,
    buffer_size=constants.BUFFER_SIZE,
    buffer_points_amount=constants.BUFFER_POINTS_AMOUNT,
)

In [6]:
for variable in constants.TARGET_VARIABLES:
    mvpf = MissingValuesPercentageFilter(threshold=constants.MISSING_VALUES_THRESHOLD)
    filtered_measurements = mvpf.fit_transform(X=measurements, variable=variable)

    for year in range(2000, 2024):
        unique_sensor_ids = filtered_measurements.query(f"`{constants.YEAR}` == {year}")[
            constants.UNIQUE_ID
        ].unique()

        sensor_metadata_subset = sensor_metadata.query(
            f"`{constants.SENSOR_ID}` in {unique_sensor_ids.tolist()}"
        )

        if sensor_metadata_subset.empty:
            print(
                f"No sensor metadata found for year {year}, variable {variable}. Skipping Voronoi transformation."
            )
            continue

        gdf_sensor_metadata = gpd.GeoDataFrame(
            sensor_metadata_subset,
            geometry=gpd.points_from_xy(
                sensor_metadata_subset[constants.LONGITUDE],
                sensor_metadata_subset[constants.LATITUDE],
            ),
            crs=constants.GLOBAL_EPSG,
        )
        gdf_sensor_metadata = gdf_sensor_metadata.to_crs(constants.POLAND_EPSG)
        gdf_sensor_metadata = gdf_sensor_metadata.reset_index(drop=True)
        try:
            gdf_sensor_metadata[constants.VORONOI_GEOMETRY] = vt.fit_transform(
                X=gdf_sensor_metadata
            )
        except Exception as e:
            print(f"Error during Voronoi transformation for year {year}, variable {variable}: {e}")
            continue

        sensor_metadata = sensor_metadata.merge(
            gdf_sensor_metadata[[constants.SENSOR_ID, constants.VORONOI_GEOMETRY]],
            on=constants.SENSOR_ID,
            how="left",
        )

        sensor_metadata = sensor_metadata.rename(
            columns={constants.VORONOI_GEOMETRY: f"{variable}_{year}_{constants.VORONOI_GEOMETRY}"}
        )

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  simple_X[constants.YEAR] = simple_X[constants.TIMESTAMP_COLUMN].dt.year


No sensor metadata found for year 2000, variable C6H6. Skipping Voronoi transformation.
No sensor metadata found for year 2001, variable C6H6. Skipping Voronoi transformation.
No sensor metadata found for year 2002, variable C6H6. Skipping Voronoi transformation.
No sensor metadata found for year 2003, variable C6H6. Skipping Voronoi transformation.
No sensor metadata found for year 2015, variable C6H6. Skipping Voronoi transformation.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  simple_X[constants.YEAR] = simple_X[constants.TIMESTAMP_COLUMN].dt.year


No sensor metadata found for year 2000, variable CO. Skipping Voronoi transformation.
No sensor metadata found for year 2001, variable CO. Skipping Voronoi transformation.
No sensor metadata found for year 2002, variable CO. Skipping Voronoi transformation.
Error during Voronoi transformation for year 2003, variable CO: QH6019 qhull input error (qh_scalelast): can not scale last coordinate to [   0,  inf].  Input is cocircular or cospherical.   Use option 'Qz' to add a point at infinity.

While executing:  | qhull v Qc Qz Qbb
Options selected for Qhull 2019.1.r 2019/06/21:
  run-id 1025940941  voronoi  Qcoplanar-keep  Qz-infinity-point  Qbbound-last
  _pre-merge  _zero-centrum  Qinterior-keep  Pgood  _maxoutside  0

No sensor metadata found for year 2015, variable CO. Skipping Voronoi transformation.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  simple_X[constants.YEAR] = simple_X[constants.TIMESTAMP_COLUMN].dt.year


No sensor metadata found for year 2000, variable NO. Skipping Voronoi transformation.
No sensor metadata found for year 2001, variable NO. Skipping Voronoi transformation.
No sensor metadata found for year 2002, variable NO. Skipping Voronoi transformation.
No sensor metadata found for year 2003, variable NO. Skipping Voronoi transformation.
No sensor metadata found for year 2004, variable NO. Skipping Voronoi transformation.
No sensor metadata found for year 2005, variable NO. Skipping Voronoi transformation.
No sensor metadata found for year 2006, variable NO. Skipping Voronoi transformation.
No sensor metadata found for year 2007, variable NO. Skipping Voronoi transformation.
No sensor metadata found for year 2008, variable NO. Skipping Voronoi transformation.
No sensor metadata found for year 2009, variable NO. Skipping Voronoi transformation.
No sensor metadata found for year 2010, variable NO. Skipping Voronoi transformation.
No sensor metadata found for year 2011, variable NO. S

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  simple_X[constants.YEAR] = simple_X[constants.TIMESTAMP_COLUMN].dt.year


No sensor metadata found for year 2015, variable NO2. Skipping Voronoi transformation.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  simple_X[constants.YEAR] = simple_X[constants.TIMESTAMP_COLUMN].dt.year


No sensor metadata found for year 2015, variable NOx. Skipping Voronoi transformation.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  simple_X[constants.YEAR] = simple_X[constants.TIMESTAMP_COLUMN].dt.year


Error during Voronoi transformation for year 2003, variable O3: QH6019 qhull input error (qh_scalelast): can not scale last coordinate to [   0,  inf].  Input is cocircular or cospherical.   Use option 'Qz' to add a point at infinity.

While executing:  | qhull v Qc Qz Qbb
Options selected for Qhull 2019.1.r 2019/06/21:
  run-id 1026814905  voronoi  Qcoplanar-keep  Qz-infinity-point  Qbbound-last
  _pre-merge  _zero-centrum  Qinterior-keep  Pgood  _maxoutside  0

No sensor metadata found for year 2015, variable O3. Skipping Voronoi transformation.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  simple_X[constants.YEAR] = simple_X[constants.TIMESTAMP_COLUMN].dt.year


No sensor metadata found for year 2000, variable PM2.5. Skipping Voronoi transformation.
No sensor metadata found for year 2001, variable PM2.5. Skipping Voronoi transformation.
No sensor metadata found for year 2002, variable PM2.5. Skipping Voronoi transformation.
No sensor metadata found for year 2003, variable PM2.5. Skipping Voronoi transformation.
No sensor metadata found for year 2004, variable PM2.5. Skipping Voronoi transformation.
No sensor metadata found for year 2005, variable PM2.5. Skipping Voronoi transformation.
No sensor metadata found for year 2006, variable PM2.5. Skipping Voronoi transformation.
No sensor metadata found for year 2007, variable PM2.5. Skipping Voronoi transformation.
No sensor metadata found for year 2008, variable PM2.5. Skipping Voronoi transformation.
No sensor metadata found for year 2009, variable PM2.5. Skipping Voronoi transformation.
No sensor metadata found for year 2010, variable PM2.5. Skipping Voronoi transformation.
No sensor metadata fo

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  simple_X[constants.YEAR] = simple_X[constants.TIMESTAMP_COLUMN].dt.year


No sensor metadata found for year 2000, variable PM10. Skipping Voronoi transformation.
No sensor metadata found for year 2001, variable PM10. Skipping Voronoi transformation.
No sensor metadata found for year 2002, variable PM10. Skipping Voronoi transformation.
Error during Voronoi transformation for year 2003, variable PM10: QH6019 qhull input error (qh_scalelast): can not scale last coordinate to [   0,  inf].  Input is cocircular or cospherical.   Use option 'Qz' to add a point at infinity.

While executing:  | qhull v Qc Qz Qbb
Options selected for Qhull 2019.1.r 2019/06/21:
  run-id 1027167852  voronoi  Qcoplanar-keep  Qz-infinity-point  Qbbound-last
  _pre-merge  _zero-centrum  Qinterior-keep  Pgood  _maxoutside  0

No sensor metadata found for year 2015, variable PM10. Skipping Voronoi transformation.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  simple_X[constants.YEAR] = simple_X[constants.TIMESTAMP_COLUMN].dt.year


No sensor metadata found for year 2000, variable SO2. Skipping Voronoi transformation.
Error during Voronoi transformation for year 2003, variable SO2: QH6019 qhull input error (qh_scalelast): can not scale last coordinate to [   0,  inf].  Input is cocircular or cospherical.   Use option 'Qz' to add a point at infinity.

While executing:  | qhull v Qc Qz Qbb
Options selected for Qhull 2019.1.r 2019/06/21:
  run-id 1027403150  voronoi  Qcoplanar-keep  Qz-infinity-point  Qbbound-last
  _pre-merge  _zero-centrum  Qinterior-keep  Pgood  _maxoutside  0

No sensor metadata found for year 2015, variable SO2. Skipping Voronoi transformation.


In [7]:
sensor_metadata = gpd.GeoDataFrame(
    sensor_metadata,
    geometry=gpd.points_from_xy(
        sensor_metadata[constants.LONGITUDE],
        sensor_metadata[constants.LATITUDE],
    ),
    crs=constants.GLOBAL_EPSG,
)
sensor_metadata = sensor_metadata.to_crs(constants.POLAND_EPSG)
sensor_metadata.to_parquet(f"{OUTPUT_FILE}")