In [None]:
!pip install -q duckrun --upgrade

In [None]:
ws                 = 'external'
lh                 = 'external'
schema_source      = 'testing'
schema_destination = 'ga'
table_source       = 'icebergga'

In [None]:
import duckrun
con = duckrun.connect(f'{ws}/{lh}.lakehouse/{schema_source}')

üîå Attaching tables from schema: testing
üîç Discovering tables via OneLake Delta Table API...
   Using identifier: external/external.Lakehouse
   Listing tables in schema: testing


   Found 2 tables


icebergga, simple


In [None]:
# Dim_Traffic
con.sql(f"""
    SELECT DISTINCT
        trafficSource.source || '_' || trafficSource.medium AS traffic_key,
        trafficSource.source,
        trafficSource.medium,
        trafficSource.campaign,
        trafficSource.keyword,
        trafficSource.adContent
    FROM {table_source}
    WHERE trafficSource.source IS NOT NULL
""").write.mode("overwrite").saveAsTable(f'{schema_destination}.dim_traffic')

# Dim_Device
con.sql(f"""
    SELECT DISTINCT
        device.deviceCategory || '_' || device.operatingSystem AS device_key,
        device.deviceCategory,
        device.operatingSystem,
        device.browser,
        device.isMobile
    FROM {table_source}
""").write.mode("overwrite").saveAsTable(f'{schema_destination}.dim_device')

# Dim_Geography
con.sql(f"""
    WITH ranked AS (
        SELECT 
            lower(geoNetwork.country) || '_' || lower(geoNetwork.city) AS geo_key,
            geoNetwork.continent,
            geoNetwork.country,
            geoNetwork.region,
            geoNetwork.city,
            ROW_NUMBER() OVER (
                PARTITION BY lower(geoNetwork.country) || '_' || lower(geoNetwork.city)
                ORDER BY 
                    CASE WHEN geoNetwork.region IS NOT NULL THEN 0 ELSE 1 END,
                    geoNetwork.region
            ) AS rn
        FROM {table_source}
        WHERE geoNetwork.country IS NOT NULL
    )
    SELECT geo_key, continent, country, region, city
    FROM ranked
    WHERE rn = 1
""").write.mode('overwrite').saveAsTable(f'{schema_destination}.dim_geography')

# Dim_Visitor
con.sql(f"""
    SELECT DISTINCT
        fullVisitorId AS visitor_key,
        fullVisitorId
    FROM {table_source}
""").write.mode("overwrite").saveAsTable(f'{schema_destination}.dim_visitor')

# Dim_Date
con.sql("""
    WITH date_range AS (
        SELECT UNNEST(
            generate_series(
                DATE '2010-01-01', 
                DATE '2030-12-31', 
                INTERVAL 1 DAY
            )
        ) AS full_date
    )
    SELECT 
        CAST(STRFTIME(full_date, '%Y%m%d') AS INTEGER) AS date_key,
        cast(full_date as date) as full_date ,
        YEAR(full_date) AS year,
        MONTH(full_date) AS month,
        DAY(full_date) AS day,
        DAYOFWEEK(full_date) AS day_of_week,
        DAYNAME(full_date) AS day_name,
        MONTHNAME(full_date) AS month_name,
        QUARTER(full_date) AS quarter,
        CASE 
            WHEN DAYOFWEEK(full_date) IN (6, 7) THEN TRUE 
            ELSE FALSE 
        END AS is_weekend,
        WEEKOFYEAR(full_date) AS week_of_year
    FROM date_range
    ORDER BY date_key
""").write.mode("ignore").saveAsTable(f'{schema_destination}.dim_date')

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Writing to Delta table: ga.dim_traffic (mode=overwrite) (engine=pyarrow, optimized row groups, compression=ZSTD)


‚úÖ Table ga.dim_traffic created/overwritten


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Writing to Delta table: ga.dim_device (mode=overwrite) (engine=pyarrow, optimized row groups, compression=ZSTD)
‚úÖ Table ga.dim_device created/overwritten


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Writing to Delta table: ga.dim_geography (mode=overwrite) (engine=pyarrow, optimized row groups, compression=ZSTD)
‚úÖ Table ga.dim_geography created/overwritten


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Writing to Delta table: ga.dim_visitor (mode=overwrite) (engine=pyarrow, optimized row groups, compression=ZSTD)


‚úÖ Table ga.dim_visitor created/overwritten
Creating table ga.dim_date (mode='ignore', table doesn't exist)
Writing to Delta table: ga.dim_date (mode=overwrite) (engine=pyarrow, optimized row groups, compression=ZSTD)
‚úÖ Table ga.dim_date created/overwritten


'dim_date'

In [None]:
con.sql(f"""
    SELECT 
        -- Keys
        cast(date AS integer) as date_key,
        fullVisitorId AS visitor_key,
        trafficSource.source || '_' || trafficSource.medium AS traffic_key,
        device.deviceCategory || '_' || device.operatingSystem AS device_key,
        lower(geoNetwork.country) || '_' || lower(geoNetwork.city) AS geo_key,
        
        -- Measures
        totals.visits,
        totals.hits,
        totals.pageviews,
        totals.timeOnSite,
        totals.bounces,
        totals.transactions,
        totals.transactionRevenue / 1000000.0 AS revenue ,
        totals.newVisits,
        visitNumber
        
    FROM {table_source}
""").write.mode("overwrite").saveAsTable(f'{schema_destination}.fact_sessions')

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Writing to Delta table: ga.fact_sessions (mode=overwrite) (engine=pyarrow, optimized row groups, compression=ZSTD)


‚úÖ Table ga.fact_sessions created/overwritten


'fact_sessions'

In [None]:
con = duckrun.connect(f'{ws}/{lh}.lakehouse/{schema_destination}')
con.deploy("https://raw.githubusercontent.com/djouallah/Fabric_Notebooks_Demo/refs/heads/main/googleanalytics/ga.bim","ga")
con.deploy_pbix("https://github.com/djouallah/Fabric_Notebooks_Demo/raw/refs/heads/main/googleanalytics/report.pbix","ga","ga")

üîå Attaching tables from schema: ga
üîç Discovering tables via OneLake Delta Table API...
   Using identifier: external/external.Lakehouse
   Listing tables in schema: ga


   Found 6 tables


dim_date, dim_device, dim_geography, dim_traffic, dim_visitor, fact_sessions
Semantic Model Deployment (DirectLake)
‚úÖ Using cached Fabric API token

[Step 1/6] Getting workspace information...
OK Found workspace: external

[Step 2/6] Checking if dataset 'ga' exists...


OK Dataset 'ga' already exists - skipping deployment
   Waiting 5 seconds...



[Step 3/3] Refreshing existing semantic model...
   Checking for in-progress refreshes...
   Step 1: Clearing values from memory...


   OK Clear values initiated, monitoring progress...


   OK Clear values completed
   Step 2: Full refresh to reframe data...


   OK Refresh initiated
   Monitoring refresh progress...


   Status: Unknown...


OK Refresh completed successfully

 SUCCESS: Refresh Completed!
Dataset: ga
PBIX Upload and Bind Operation
‚úÖ Using cached Fabric API token

[Step 1/6] Getting workspace information...
OK Found workspace: external

[Step 2/6] Finding semantic model 'ga'...


OK Found semantic model (ID: 82886d2a-6b1b-4c7f-99ca-ef673e13c3f3)

[Step 3/6] Checking if report 'ga' already exists...
OK Report name is available

[Step 4/7] Downloading PBIX file from URL...
  URL: https://github.com/djouallah/Fabric_Notebooks_Demo/raw/refs/heads/main/googleanalytics/report.pbix
OK Downloaded PBIX file (25384 bytes)

[Step 5/7] Extracting PBIX contents...
  OK Found Report/Layout (929052 bytes)

[Step 6/7] Creating report in Fabric workspace...


  Report creation initiated, monitoring progress...


  OK Report creation completed
  Searching for created report...


  OK Found report by name
OK Report created successfully (ID: deed2abd-2c9d-4f46-b43e-a8259b590a4c)

[Step 7/7] Verifying report binding...
Report 'ga' is now bound to semantic model 'ga'

SUCCESS: Upload and Bind Completed!
Workspace: external
Report: ga
Semantic Model: ga


1