In [1]:
%run nb_functions

StatementMeta(, ee3c3d21-1bdb-46ed-9a0a-db59839cb106, 16, Finished, Available, Finished)

nb_config Loaded
nb_logging Loaded


In [2]:
logger = get_logger(__name__)
logger.info("Silver -> Gold süreci başladı.")

StatementMeta(, ee3c3d21-1bdb-46ed-9a0a-db59839cb106, 17, Finished, Available, Finished)

[INFO] __main__ - Silver -> Gold süreci başladı.


# --- HÜCRE 2: Data Konfigürasyonları ---

In [3]:
# Veriyi "Silver" katmanından "Table" formatında okuyacağız (Delta Files)
# Analiz Config
gold_config = {
    "target_table": "gold_smart_city_analysis",
    "base_dataset": "energy",
    "joins": [
        {
            "dataset": "weather",
            "key": "date",  # Join Key (Tarih bazlı)
            "how": "left"
        },
        {
            "dataset": "air_quality",
            "key": "date",
            "how": "left"
        }
    ]
}

StatementMeta(, ee3c3d21-1bdb-46ed-9a0a-db59839cb106, 18, Finished, Available, Finished)

# --- HÜCRE 3: Veriyi Oku ve Hazırla ---

In [4]:
# Her dataset için okuma, tarih formatlama ve hazırlık
datasets = {}

# 1. Energy
energy_path = f"{PATH_SILVER}/energy"
logger.info(f"Okunuyor: Energy ({energy_path})")
df_energy = spark.read.format("delta").load(energy_path)
df_energy = enrich_date_time_features(df_energy, input_col="readingdate", target_col="date")
df_energy = format_decimal_col(df_energy, "price", 2)
datasets["energy"] = df_energy

# 2. Weather
weather_path = f"{PATH_SILVER}/weather"
logger.info(f"Okunuyor: Weather ({weather_path})")
df_weather = spark.read.format("delta").load(weather_path)
df_weather = enrich_date_time_features(df_weather, input_col="time", target_col="date")
datasets["weather"] = df_weather

# 3. Air Quality
air_path = f"{PATH_SILVER}/quality"
logger.info(f"Okunuyor: Air Quality ({air_path})")
df_air = spark.read.format("delta").load(air_path)
df_air = enrich_date_time_features(df_air, input_col="time", target_col="date")
datasets["air_quality"] = df_air

StatementMeta(, ee3c3d21-1bdb-46ed-9a0a-db59839cb106, 19, Finished, Available, Finished)

[INFO] __main__ - Okunuyor: Energy (abfss://smart_city@onelake.dfs.fabric.microsoft.com/smart_city_lakehouse.Lakehouse/Files/silver/smart_city/energy)
[INFO] __main__ - Okunuyor: Weather (abfss://smart_city@onelake.dfs.fabric.microsoft.com/smart_city_lakehouse.Lakehouse/Files/silver/smart_city/weather)
[INFO] __main__ - Okunuyor: Air Quality (abfss://smart_city@onelake.dfs.fabric.microsoft.com/smart_city_lakehouse.Lakehouse/Files/silver/smart_city/quality)


# --- HÜCRE 4: Integration (Join) ---

In [5]:
logger.info("Join işlemi başlıyor...")

StatementMeta(, ee3c3d21-1bdb-46ed-9a0a-db59839cb106, 20, Finished, Available, Finished)

[INFO] __main__ - Join işlemi başlıyor...


In [6]:
# Ana tabloyu al
final_df = datasets[gold_config["base_dataset"]]

for join_rule in gold_config["joins"]:
    dataset_name = join_rule["dataset"]
    join_key = join_rule["key"]
    join_type = join_rule["how"]
    
    if dataset_name in datasets:
        right_df = datasets[dataset_name]
        
        # Çakışan kolonları temizle (Join key hariç)
        # Sağ tabloda olup sol tabloda da olan (örn: _id, _source) kolonları drop et
        cols_to_drop = [c for c in right_df.columns if c in final_df.columns and c != join_key]
        right_df = right_df.drop(*cols_to_drop)
        
        logger.info(f"Joining: {dataset_name} on {join_key}")
        final_df = final_df.join(right_df, on=join_key, how=join_type)
    else:
        logger.warning(f"Dataset {dataset_name} bulunamadı, join atlanıyor.")

StatementMeta(, ee3c3d21-1bdb-46ed-9a0a-db59839cb106, 21, Finished, Available, Finished)

[INFO] __main__ - Joining: weather on date
[INFO] __main__ - Joining: air_quality on date


# --- HÜCRE 5: Load to Gold (Managed Table) ---

In [7]:
target_table_name = gold_config["target_table"]
logger.info(f"Gold Tabloya Yazılıyor: {target_table_name}")

StatementMeta(, ee3c3d21-1bdb-46ed-9a0a-db59839cb106, 22, Finished, Available, Finished)

[INFO] __main__ - Gold Tabloya Yazılıyor: gold_smart_city_analysis


In [8]:
#Gold katmanı raporlama katmanı olduğu için genelde Table olarak kaydederiz.
# nb_functions içindeki fonksiyonu kullanıyoruz, ama table_name parametresi ile.
load_data_into_delta_table(
    data=final_df,
    sink_path=None, # Table modunda path opsiyoneldir (managed)
    full_load=True, # Raporu her seferinde yeniden oluşturuyoruz (Analiz tablosu)
    table_name=target_table_name
)

logger.info(f"İşlem Başarılı. Tablo: {target_table_name} oluşturuldu.")

StatementMeta(, ee3c3d21-1bdb-46ed-9a0a-db59839cb106, 23, Finished, Available, Finished)

SUCCESS: Table 'gold_smart_city_analysis' saved.
[INFO] __main__ - İşlem Başarılı. Tablo: gold_smart_city_analysis oluşturuldu.
