In [0]:
import json
import pandas as pd
from pyspark.sql import functions as F

In [0]:
vars = dbutils.jobs.taskValues.get(taskKey = 'gold_task', key = 'layer_vars_key')

gold_reports = vars['gold_reports']

local_geojson_path = '/dbfs/tmp/reports.geojson'
consumption_path = 'abfss://consumption@storagegemeente.dfs.core.windows.net'

In [0]:
# read gold table into spark df
gold_df = spark.read.table(gold_reports)

In [0]:
# all cases that are solved
gold_df_solved = gold_df.filter(F.col('status') == 'solved')

In [0]:
# for the consumption layer we only want cases that are still open
gold_df_filtered = gold_df.filter(F.col('status') == 'open')

In [0]:
# further filter to only include required columns for consumption
# create fitting date format
gold_df_filtered = gold_df_filtered.select(
    'id',
    'problem_norm',
    'prioriteit',
    'street_name',
    'house_number',
    'postcode',
    'lat',
    'lon',
    F.date_format('reported_on', 'dd-MM-yyyy HH:mm').alias('reported_on')
)

In [0]:
# convert to pandas 
pdf = gold_df_filtered.toPandas()

# convert to geojson structure
# iterrows wil return a tuple with index and row
features = []
for index, row in pdf.iterrows():
    features.append({
        'type': 'Feature',
        'geometry': {
            'type': 'Point',
            'coordinates': [row['lon'], row['lat']]
        },
        'properties': {
            'id': row['id'],
            'problem': row['problem_norm'],
            'street_name': row['street_name'],
            'house_number': row['house_number'],
            'postcode': row['postcode'],
            'priority': row['prioriteit'],
            'reported_on': str(row['reported_on'])
        }
    })

geojson = {
    'type': 'FeatureCollection',
    'features': features
}

# spark can not directly write geojson to adls
# save geojson dataset to local tmp storage first, hen we copy it to adls
# we overwrite the previous file to remove the solved cases from the map
dbutils.fs.put(local_geojson_path, json.dumps(geojson), True)

dbutils.fs.cp(local_geojson_path, consumption_path, True)

[0;31m---------------------------------------------------------------------------[0m
[0;31mTypeError[0m                                 Traceback (most recent call last)
File [0;32m<command-5860913143568507>, line 33[0m
[1;32m      1[0m [38;5;66;03m# # convert to pandas [39;00m
[1;32m      2[0m [38;5;66;03m# pdf = gold_df_filtered.toPandas()[39;00m
[1;32m      3[0m 
[0;32m   (...)[0m
[1;32m     31[0m [38;5;66;03m# # save geojson dataset to local tmp storage first, hen we copy it to adls[39;00m
[1;32m     32[0m [38;5;66;03m# # we overwrite the previous file to remove the solved cases from the map[39;00m
[0;32m---> 33[0m dbutils[38;5;241m.[39mfs[38;5;241m.[39mput(local_geojson_path, json[38;5;241m.[39mdumps(geojson_df), [38;5;28;01mTrue[39;00m)
[1;32m     35[0m dbutils[38;5;241m.[39mfs[38;5;241m.[39mcp(local_geojson_path, consumption_path, [38;5;28;01mTrue[39;00m)

File [0;32m/usr/lib/python3.12/json/__init__.py:231[0m, in [0;36mdumps[0;34m