In [0]:
from datetime import datetime
import pandas as pd

# 1. Citește fișierul JSON scris de ADF (expandat)
raw_df = spark.read.option("multiline", True).format("json").load("/mnt/data/raw/")

# 2. Explode pe "features" și extrage câmpurile
features_df = raw_df.selectExpr("explode(features) as feature")

# 3. Extrage fiecare câmp necesar
rows = features_df.select(
    features_df.feature.id.alias("id"),
    features_df.feature.properties.place.alias("place"),
    features_df.feature.properties.time.alias("time_ms"),
    features_df.feature.properties.mag.alias("mag"),
    features_df.feature.properties.type.alias("type"),
    features_df.feature.properties.status.alias("status"),
    features_df.feature.properties.tsunami.alias("tsunami"),
    features_df.feature.properties.felt.alias("felt"),
    features_df.feature.properties.alert.alias("alert"),
    features_df.feature.geometry.coordinates.getItem(0).alias("longitude"),
    features_df.feature.geometry.coordinates.getItem(1).alias("latitude"),
    features_df.feature.geometry.coordinates.getItem(2).alias("depth_km")
)

# 4. Transformă în Pandas și convertește timestamp-ul
df_pd = rows.toPandas()
df_pd["time"] = pd.to_datetime(df_pd["time_ms"], unit='ms')
df_pd["ingest_time"] = datetime.utcnow()
df_pd.drop(columns=["time_ms"], inplace=True)

# 5. Conversie în Spark DataFrame
df_bronze = spark.createDataFrame(df_pd)

# 6. Scriere în zona BRONZE
df_bronze.write.format("delta") \
    .option("mergeSchema", "true") \
    .mode("append") \
    .save("/mnt/data/earthquakes/bronze")


In [0]:
df_bronze = spark.read.format("delta").load("/mnt/data/earthquakes/bronze/")
display(df_bronze)

id,place,time,mag,type,status,tsunami,felt,latitude,longitude,depth,ingest_time,depth_km
nc75184347,"10 km NNW of The Geysers, CA",2025-05-19T10:33:10.76Z,0.76,earthquake,automatic,0,,38.8423347473145,-122.838996887207,,2025-05-23T11:50:03.766386Z,1.52999997138977
ok2025jsye,"8 km E of Chickasha, Oklahoma",2025-05-19T10:29:51.704Z,1.9,earthquake,automatic,0,,35.05583572,-97.84127808,,2025-05-23T11:50:03.766395Z,4.104894161
ci41156160,"11 km SSW of Borrego Springs, CA",2025-05-19T10:20:41.04Z,0.85,earthquake,automatic,0,,33.1701667,-116.4416667,,2025-05-23T11:50:03.766401Z,13.07
nc75184342,"8 km NNW of The Geysers, CA",2025-05-19T10:12:24.9Z,0.31,earthquake,automatic,0,,38.8333320617676,-122.814834594727,,2025-05-23T11:50:03.766407Z,1.37000000476837
nc75184337,"7 km NW of The Geysers, CA",2025-05-19T10:06:29.63Z,1.37,earthquake,automatic,0,,38.8158340454102,-122.821334838867,,2025-05-23T11:50:03.766413Z,2.10999989509583
nc75184332,"7 km NW of The Geysers, CA",2025-05-19T10:06:08.09Z,1.08,earthquake,automatic,0,,38.8166656494141,-122.822166442871,,2025-05-23T11:50:03.766419Z,2.17000007629395
ak0256e1ovfr,"21 km SW of Susitna, Alaska",2025-05-19T10:06:05.831Z,1.2,earthquake,automatic,0,1.0,61.417,-150.8067,,2025-05-23T11:50:03.766425Z,56.7
nc75184327,"6 km NW of The Geysers, CA",2025-05-19T10:04:20.6Z,1.07,earthquake,automatic,0,,38.800666809082,-122.814666748047,,2025-05-23T11:50:03.766435Z,2.80999994277954
nc75184317,"2 km of The Geysers, CA",2025-05-19T09:58:12.59Z,0.88,earthquake,automatic,0,,38.7941665649414,-122.762832641602,,2025-05-23T11:50:03.766442Z,0.400000005960464
nc75184312,"6 km NNW of The Geysers, CA",2025-05-19T09:57:25.99Z,1.11,earthquake,automatic,0,,38.8211669921875,-122.802833557129,,2025-05-23T11:50:03.766449Z,2.61999988555908


In [0]:
display(dbutils.fs.ls("/mnt/data/"))


path,name,size,modificationTime
dbfs:/mnt/data/raw/,raw/,0,0


In [0]:
display(dbutils.fs.mounts())


mountPoint,source,encryptionType
/databricks-datasets,databricks-datasets,
/Volumes,UnityCatalogVolumes,
/databricks/mlflow-tracking,databricks/mlflow-tracking,
/databricks-results,databricks-results,
/databricks/mlflow-registry,databricks/mlflow-registry,
/Volume,DbfsReserved,
/volumes,DbfsReserved,
/mnt/data,wasbs://earthquakes@earthquakedatalakee.blob.core.windows.net/,
/,DatabricksRoot,
/volume,DbfsReserved,


In [0]:
display(dbutils.fs.ls("dbfs:/mnt/data/"))


path,name,size,modificationTime
dbfs:/mnt/data/raw/,raw/,0,0


In [0]:
display(dbutils.fs.ls("dbfs:/mnt/data/raw/"))


path,name,size,modificationTime
dbfs:/mnt/data/raw/earthquake_raw_20250526102740.json,earthquake_raw_20250526102740.json,145308,1748255269000
dbfs:/mnt/data/raw/earthquake_raw_20250526103856.json,earthquake_raw_20250526103856.json,145329,1748255947000
dbfs:/mnt/data/raw/earthquake_raw_20250526105050.json,earthquake_raw_20250526105050.json,143209,1748256660000
