In [0]:
-- Creating the metrics view
CREATE OR REPLACE VIEW test_drive.road_crash.metrics_crash
WITH METRICS
LANGUAGE YAML 
AS $$
version: 0.1
source: test_drive.road_crash.vw_crash_details
filter: Year >= 2010 AND REPORT_ID IS NOT NULL

dimensions:
  - name: Crash Year
    expr: Year
  - name: Crash Month  
    expr: Month
  - name: longitude
    expr: longitude
  - name: latitude
    expr: latitude
  - name: Crash Quarter
    expr: CASE 
      WHEN Month IN ('January', 'February', 'March') THEN 'Q1'
      WHEN Month IN ('April', 'May', 'June') THEN 'Q2'  
      WHEN Month IN ('July', 'August', 'September') THEN 'Q3'
      ELSE 'Q4'
      END
  - name: Time Period
    expr: CASE
      WHEN DayNight = 'Daylight' THEN 'Daylight Hours'
      WHEN DayNight = 'Night' THEN 'Night Hours'
      ELSE 'Unknown'
      END
  - name: LGA Region
    expr: "`LGA Name`"
  - name: Suburb Area
    expr: Suburb
  - name: Speed Zone Category
    expr: CASE
      WHEN `Area Speed` <= 50 THEN 'Urban (≤50 km/h)'
      WHEN `Area Speed` <= 80 THEN 'Semi-Urban (51-80 km/h)'
      WHEN `Area Speed` <= 100 THEN 'Rural (81-100 km/h)'
      WHEN `Area Speed` > 100 THEN 'Highway (>100 km/h)'
      ELSE 'Unknown Speed Limit'
      END
  - name: Road Condition
    expr: COALESCE(`Road Surface`, 'Unknown')
  - name: Weather Category
    expr: CASE
      WHEN `Weather Cond` = 'Not Raining' THEN 'Clear/Dry'
      WHEN `Weather Cond` LIKE '%Rain%' THEN 'Wet/Rainy'
      ELSE COALESCE(`Weather Cond`, 'Unknown')
      END
  - name: Road Surface Condition
    expr: COALESCE(`Moisture Cond`, 'Unknown')
  - name: Driver Gender
    expr: CASE
      WHEN Driver_Sex = 'Male' THEN 'Male'
      WHEN Driver_Sex = 'Female' THEN 'Female'
      ELSE 'Not Specified'
      END
  - name: Driver Age Group
    expr: CASE
      WHEN try_cast(Driver_Age AS INT) < 25 THEN 'Under 25'
      WHEN try_cast(Driver_Age AS INT) < 35 THEN '25-34'
      WHEN try_cast(Driver_Age AS INT) < 50 THEN '35-49'
      WHEN try_cast(Driver_Age AS INT) < 65 THEN '50-64'
      WHEN try_cast(Driver_Age AS INT) >= 65 THEN '65+'
      ELSE 'Unknown Age'
      END    
  - name: Vehicle Age Category
    expr: CASE
      WHEN try_cast(`Veh Year` AS INT) >= 2020 THEN 'New (2020+)'
      WHEN try_cast(`Veh Year` AS INT) >= 2015 THEN 'Recent (2015-2019)'
      WHEN try_cast(`Veh Year` AS INT) >= 2010 THEN 'Older (2010-2014)'
      WHEN try_cast(`Veh Year` AS INT) >= 2000 THEN 'Old (2000-2009)'
      WHEN try_cast(`Veh Year` AS INT) < 2000 THEN 'Very Old (<2000)'
      ELSE 'Unknown Year'
      END
  - name: Safety Belt Usage
    expr: CASE
      WHEN `Seat Belt` = 'Fitted - Worn' THEN 'Seat Belt Used'
      WHEN `Seat Belt` = 'Fitted - Not Worn' THEN 'Seat Belt Not Used'
      WHEN `Seat Belt` IS NULL OR "Seat Belt" = 'null' THEN 'Not Applicable'
      ELSE 'Unknown'
      END  
  - name: Injury Severity
    expr: COALESCE(`Injury Extent`, 'No Injury Recorded')
  - name: Casualty Category
    expr: COALESCE(`Casualty Type`, 'No Casualty')

measures:
  - name: Total Crashes
    expr: COUNT(DISTINCT REPORT_ID)
  - name: Total Casualties
    expr: SUM(try_cast(COALESCE(Total_Casualties, 0) AS INT))
  - name: Total Fatalities
    expr: SUM(try_cast(COALESCE(Total_Fatalities, 0) AS INT))
  - name: Total Vehicles Involved
    expr: SUM(try_cast(COALESCE(`Total Units`, 1) AS INT))
  - name: Casualty Rate
    expr: try_cast(SUM(COALESCE(Total_Casualties, 0)) / COUNT(DISTINCT REPORT_ID) AS DOUBLE)
  - name: Fatality Rate
    expr: try_cast(SUM(COALESCE(Total_Fatalities, 0)) / COUNT(DISTINCT REPORT_ID) AS DOUBLE)
  - name: Multi Vehicle Crashes
    expr: COUNT(DISTINCT CASE WHEN `Total Units` > 1 THEN REPORT_ID END)
  - name: Single Vehicle Crashes
    expr: COUNT(DISTINCT CASE WHEN `Total Units` = 1 THEN REPORT_ID END)
  - name: Fatal Crashes
    expr: COUNT(DISTINCT CASE WHEN Total_Fatalities > 0 THEN REPORT_ID END)
  - name: Injury Crashes
    expr: COUNT(DISTINCT CASE WHEN Total_Casualties > 0 THEN REPORT_ID END)
  - name: Property Damage Only Crashes
    expr: COUNT(DISTINCT CASE WHEN COALESCE(Total_Casualties, 0) = 0 AND COALESCE(Total_Fatalities, 0) = 0 THEN REPORT_ID END)
  - name: Wet Weather Crashes
    expr: COUNT(DISTINCT CASE WHEN `Weather Cond` LIKE '%Rain%' OR "Moisture Cond" = 'Wet' THEN REPORT_ID END)
  - name: Night Time Crashes
    expr: COUNT(DISTINCT CASE WHEN DayNight = 'Night' THEN REPORT_ID END)
  - name: High Speed Zone Crashes
    expr: COUNT(DISTINCT CASE WHEN `Area Speed` > 80 THEN REPORT_ID END)
  - name: Seat Belt Compliance Rate
    expr: COUNT(CASE WHEN `Seat Belt` = 'Fitted - Worn' THEN 1 END) / NULLIF(COUNT(CASE WHEN `Seat Belt` IN ('Fitted - Worn', 'Fitted - Not Worn') THEN 1 END), 0)
  - name: Hospital Admissions
    expr: COUNT(CASE WHEN Hospital IS NOT NULL AND Hospital != 'null' THEN 1 END)
  - name: Unique Locations
    expr: COUNT(DISTINCT CONCAT(COALESCE(longitude, 0), ',', COALESCE(latitude, 0)))
  - name: Average Speed Limit
    expr: AVG(`Area Speed`)
$$;

-- Overall crash statistics by year
SELECT 
    `Crash Year`,
    MEASURE(`Total Crashes`),
    MEASURE(`Total Casualties`), 
    MEASURE(`Total Fatalities`),
    MEASURE(`Casualty Rate`),
    MEASURE(`Fatality Rate`)
FROM test_drive.road_crash.metrics_crash
GROUP BY `Crash Year`
ORDER BY `Crash Year` DESC;

-- Monthly crash trends for 2021
SELECT 
    `Crash Month`,
    MEASURE(`Total Crashes`),
    MEASURE(`Total Casualties`),
    MEASURE(`Fatal Crashes`),
    MEASURE(`Injury Crashes`)
FROM test_drive.road_crash.metrics_crash
WHERE `Crash Year` = 2021 
GROUP BY `Crash Month`
ORDER BY 
    CASE `Crash Month`
        WHEN 'January' THEN 1 WHEN 'February' THEN 2 WHEN 'March' THEN 3
        WHEN 'April' THEN 4 WHEN 'May' THEN 5 WHEN 'June' THEN 6
        WHEN 'July' THEN 7 WHEN 'August' THEN 8 WHEN 'September' THEN 9
        WHEN 'October' THEN 10 WHEN 'November' THEN 11 WHEN 'December' THEN 12
    END;

-- Crash statistics by LGA region
SELECT 
    `LGA Region`,
    MEASURE(`Total Crashes`),
    MEASURE(`Total Casualties`),
    MEASURE(`Fatal Crashes`),
    MEASURE(`Casualty Rate`),
    MEASURE(`Unique Locations`)
FROM test_drive.road_crash.metrics_crash
WHERE `LGA Region` IS NOT NULL
GROUP BY `LGA Region`
ORDER BY MEASURE(`Total Crashes`) DESC
LIMIT 20;

-- Find suburbs with highest crash rates and severity
SELECT 
    `Suburb Area`,
    `LGA Region`,
    MEASURE(`Total Crashes`),
    MEASURE(`Fatal Crashes`),
    MEASURE(`Casualty Rate`),
    MEASURE(`Fatality Rate`)
FROM test_drive.road_crash.metrics_crash
WHERE `Suburb Area` IS NOT NULL
GROUP BY `Suburb Area`, `LGA Region`
HAVING MEASURE(`Total Crashes`) >= 10
ORDER BY MEASURE(`Fatality Rate`) DESC, MEASURE(`Total Crashes`) DESC
LIMIT 15;

-- Compare crash severity between daylight and night hours
SELECT 
    `Time Period`,
    MEASURE(`Total Crashes`),
    MEASURE(`Fatal Crashes`),
    MEASURE(`Casualty Rate`),
    MEASURE(`Fatality Rate`),
    ROUND((MEASURE(`Fatal Crashes`) * 100.0 / MEASURE(`Total Crashes`)), 2) AS `Fatal_Crash_Percentage`
FROM test_drive.road_crash.metrics_crash
GROUP BY `Time Period`
ORDER BY `Time Period`;

-- Analyze crash patterns across different age demographics
SELECT 
    `Driver Age Group`,
    MEASURE(`Total Crashes`),
    MEASURE(`Fatal Crashes`),
    MEASURE(`Multi Vehicle Crashes`),
    MEASURE(`Single Vehicle Crashes`),
    MEASURE(`Casualty Rate`),
    MEASURE(`Seat Belt Compliance Rate`)
FROM test_drive.road_crash.metrics_crash
WHERE `Driver Age Group` != 'Unknown Age'
GROUP BY `Driver Age Group`
ORDER BY MEASURE(`Total Crashes`) DESC;

-- Compare crash statistics between male and female drivers
SELECT 
    `Driver Gender`,
    MEASURE(`Total Crashes`),
    MEASURE(`Fatal Crashes`),
    MEASURE(`Casualty Rate`),
    MEASURE(`Fatality Rate`),
    MEASURE(`Night Time Crashes`),
    MEASURE(`Seat Belt Compliance Rate`)
FROM test_drive.road_crash.metrics_crash
WHERE `Driver Gender` IN ('Male', 'Female')
GROUP BY `Driver Gender`
ORDER BY MEASURE(`Total Crashes`) DESC;

-- Analyse how weather conditions affect crash outcomes
SELECT 
    `Weather Category`,
    MEASURE(`Total Crashes`),
    MEASURE(`Fatal Crashes`),
    MEASURE(`Casualty Rate`),
    MEASURE(`Fatality Rate`),
    ROUND((MEASURE(`Fatal Crashes`) * 100.0 / MEASURE(`Total Crashes`)), 2) AS `Fatal_Crash_Percentage`
FROM test_drive.road_crash.metrics_crash
GROUP BY `Weather Category`
HAVING MEASURE(`Total Crashes`) >= 50
ORDER BY MEASURE(`Fatality Rate`) DESC;

-- Analyse crash outcomes based on seat belt usage 
SELECT 
    `Safety Belt Usage`,
    MEASURE(`Total Crashes`),
    MEASURE(`Fatal Crashes`),
    MEASURE(`Total Casualties`),
    MEASURE(`Casualty Rate`),
    MEASURE(`Fatality Rate`),
    MEASURE(`Hospital Admissions`)
FROM test_drive.road_crash.metrics_crash
WHERE `Safety Belt Usage` IN ('Seat Belt Used', 'Seat Belt Not Used')
GROUP BY `Safety Belt Usage`
ORDER BY `Safety Belt Usage`;

-- Identify the most dangerous driving scenarios 
SELECT 
    `Speed Zone Category`,
    `Time Period`,
    `Weather Category`,
    `Driver Age Group`,
    MEASURE(`Total Crashes`),
    MEASURE(`Fatal Crashes`),
    MEASURE(`Fatality Rate`),
    ROUND((MEASURE(`Fatal Crashes`) * 100.0 / MEASURE(`Total Crashes`)), 2) AS `Fatal_Crash_Percentage`
FROM test_drive.road_crash.metrics_crash
WHERE `Time Period` != 'Unknown'
    AND `Driver Age Group` != 'Unknown Age'
GROUP BY `Speed Zone Category`, `Time Period`, `Weather Category`, `Driver Age Group`
HAVING MEASURE(`Total Crashes`) >= 20
ORDER BY MEASURE(`Fatality Rate`) DESC, MEASURE(`Total Crashes`) DESC
LIMIT 20;

