In [0]:
---CAMADA BRONZE

In [0]:
CREATE CATALOG IF NOT EXISTS bronze_cyber_breaches;
USE CATALOG bronze_cyber_breaches;
CREATE SCHEMA IF NOT EXISTS bronze_cyber_breaches;
USE SCHEMA bronze_cyber_breaches;

In [0]:
CREATE CATALOG IF NOT EXISTS main;

CREATE SCHEMA IF NOT EXISTS main.bronze;
CREATE SCHEMA IF NOT EXISTS main.silver;
CREATE SCHEMA IF NOT EXISTS main.gold;

In [0]:
USE CATALOG main;
USE SCHEMA bronze;

In [0]:
CREATE VOLUME IF NOT EXISTS main.bronze.raw_data;

In [0]:
USE CATALOG main;
USE SCHEMA bronze;

CREATE OR REPLACE TABLE bronze_cyber_breaches
AS SELECT * 
FROM csv.`/Volumes/main/bronze/raw_data/bronze_cyber_breaches.csv`;

num_affected_rows,num_inserted_rows


In [0]:
SELECT * FROM bronze_cyber_breaches LIMIT 20;

_c0,_c1,_c2,_c3,_c4,_c5,_c6
,Entity,Year,Records,Organization type,Method,Sources
0.0,21st Century Oncology,2016,2200000,healthcare,hacked,[5][6]
1.0,500px,2020,14870304,social networking,hacked,[7]
2.0,Accendo Insurance Co.,2020,175350,healthcare,poor security,[8][9]
3.0,Adobe Systems Incorporated,2013,152000000,tech,hacked,[10]
4.0,Adobe Inc.,2019,7500000,tech,poor security,[11][12]
5.0,Advocate Medical Group,2017,4000000,healthcare,lost / stolen media,[13][14]
6.0,AerServ (subsidiary of InMobi),2018,75000,advertising,hacked,[15]
7.0,"Affinity Health Plan, Inc.",2013,344579,healthcare,lost / stolen media,[16][17]
8.0,Airtel,2019,320000000,telecommunications,poor security,[18]


In [0]:
---CAMADA SILVER

In [0]:
USE CATALOG main;
USE SCHEMA bronze;

DESCRIBE TABLE bronze_cyber_breaches;

col_name,data_type,comment
_c0,string,
_c1,string,
_c2,string,
_c3,string,
_c4,string,
_c5,string,
_c6,string,


In [0]:
USE CATALOG main;
USE SCHEMA silver;

--- Ajustando os nomes das colunas

DROP TABLE IF EXISTS silver_cyber_breaches;

CREATE TABLE silver_cyber_breaches AS
SELECT
    -- ID
    TRY_CAST(_c0 AS INT) AS breach_id,

    -- Organização
    TRIM(_c1) AS organization,

    -- Ano 
    TRY_CAST(_c2 AS INT) AS year,

    -- Records Exposed 
    TRY_CAST(
        REGEXP_REPLACE(_c3, '[^0-9]', '') 
        AS BIGINT
    ) AS records_exposed,

    -- Tipo de organização (texto)
    INITCAP(TRIM(_c4)) AS organization_type,

    -- Método de violação
    INITCAP(TRIM(_c5)) AS breach_method,

    -- Metadados
    current_timestamp() AS silver_load_timestamp

FROM main.bronze.bronze_cyber_breaches;


num_affected_rows,num_inserted_rows


In [0]:
SELECT * FROM silver_cyber_breaches LIMIT 20;

breach_id,organization,year,records_exposed,organization_type,breach_method,silver_load_timestamp
,Entity,,,Organization Type,Method,2025-12-17T23:56:29.072Z
0.0,21st Century Oncology,2016.0,2200000.0,Healthcare,Hacked,2025-12-17T23:56:29.072Z
1.0,500px,2020.0,14870304.0,Social Networking,Hacked,2025-12-17T23:56:29.072Z
2.0,Accendo Insurance Co.,2020.0,175350.0,Healthcare,Poor Security,2025-12-17T23:56:29.072Z
3.0,Adobe Systems Incorporated,2013.0,152000000.0,Tech,Hacked,2025-12-17T23:56:29.072Z
4.0,Adobe Inc.,2019.0,7500000.0,Tech,Poor Security,2025-12-17T23:56:29.072Z
5.0,Advocate Medical Group,2017.0,4000000.0,Healthcare,Lost / Stolen Media,2025-12-17T23:56:29.072Z
6.0,AerServ (subsidiary of InMobi),2018.0,75000.0,Advertising,Hacked,2025-12-17T23:56:29.072Z
7.0,"Affinity Health Plan, Inc.",2013.0,344579.0,Healthcare,Lost / Stolen Media,2025-12-17T23:56:29.072Z
8.0,Airtel,2019.0,320000000.0,Telecommunications,Poor Security,2025-12-17T23:56:29.072Z


In [0]:
delete from silver_cyber_breaches where breach_id IS NULL

num_affected_rows
1


In [0]:
USE CATALOG main;
USE SCHEMA silver;

SELECT * FROM silver_cyber_breaches LIMIT 20;

breach_id,organization,year,records_exposed,organization_type,breach_method,silver_load_timestamp
0,21st Century Oncology,2016,2200000.0,Healthcare,Hacked,2025-12-17T23:56:29.072Z
1,500px,2020,14870304.0,Social Networking,Hacked,2025-12-17T23:56:29.072Z
2,Accendo Insurance Co.,2020,175350.0,Healthcare,Poor Security,2025-12-17T23:56:29.072Z
3,Adobe Systems Incorporated,2013,152000000.0,Tech,Hacked,2025-12-17T23:56:29.072Z
4,Adobe Inc.,2019,7500000.0,Tech,Poor Security,2025-12-17T23:56:29.072Z
5,Advocate Medical Group,2017,4000000.0,Healthcare,Lost / Stolen Media,2025-12-17T23:56:29.072Z
6,AerServ (subsidiary of InMobi),2018,75000.0,Advertising,Hacked,2025-12-17T23:56:29.072Z
7,"Affinity Health Plan, Inc.",2013,344579.0,Healthcare,Lost / Stolen Media,2025-12-17T23:56:29.072Z
8,Airtel,2019,320000000.0,Telecommunications,Poor Security,2025-12-17T23:56:29.072Z
9,Air Canada,2018,20000.0,Transport,Hacked,2025-12-17T23:56:29.072Z


In [0]:
DESCRIBE TABLE main.silver.silver_cyber_breaches;

col_name,data_type,comment
breach_id,int,
organization,string,
year,int,
records_exposed,bigint,
organization_type,string,
breach_method,string,
silver_load_timestamp,timestamp,


In [0]:
SELECT MAX(TRY_CAST(records_exposed AS BIGINT)) 
FROM main.silver.silver_cyber_breaches;

max(TRY_CAST(records_exposed AS BIGINT))
90000002208


In [0]:
---- CAMADA GOLD

In [0]:
USE CATALOG main;
USE SCHEMA gold;

In [0]:
---Criando a tabela Dimensão de Organização

DROP TABLE IF EXISTS dim_organization;

CREATE TABLE dim_organization AS
SELECT 
    ROW_NUMBER() OVER (ORDER BY organization) AS organization_key,
    organization
FROM (
    SELECT DISTINCT organization
    FROM main.silver.silver_cyber_breaches
    WHERE organization IS NOT NULL
) sub;

num_affected_rows,num_inserted_rows


In [0]:
---Criando a tabela Dimensão de Tipo de Organização

DROP TABLE IF EXISTS dim_organization_type;

CREATE TABLE dim_organization_type AS
SELECT
  ROW_NUMBER() OVER (ORDER BY organization_type) AS organization_type_key,
  organization_type
FROM (
  SELECT DISTINCT organization_type
  FROM main.silver.silver_cyber_breaches
  WHERE organization_type IS NOT NULL
) t;

num_affected_rows,num_inserted_rows


In [0]:
---Criando a tabela Dimensão de Tipo de Ataque

DROP TABLE IF EXISTS dim_breach_method;

CREATE TABLE dim_breach_method AS
SELECT
  ROW_NUMBER() OVER (ORDER BY breach_method) AS breach_method_key,
  breach_method
FROM (
  SELECT DISTINCT breach_method
  FROM main.silver.silver_cyber_breaches
  WHERE breach_method IS NOT NULL
) t;

num_affected_rows,num_inserted_rows


In [0]:
---Criando a tabela Dimensão Ano

CREATE OR REPLACE TABLE main.gold.dim_year AS
SELECT
    ROW_NUMBER() OVER (ORDER BY year) AS year_key,
    year
FROM (
    SELECT DISTINCT year
    FROM main.silver.silver_cyber_breaches
    WHERE year IS NOT NULL
);

num_affected_rows,num_inserted_rows


In [0]:
---Criando a Tabela Fato: Cyber Breaches referenciando todas as dimensões

CREATE OR REPLACE TABLE main.gold.fact_cyber_breaches AS
SELECT
    s.breach_id,
    o.organization_key,
    ot.organization_type_key,
    bm.breach_method_key,
    y.year_key,
    s.records_exposed,
    s.silver_load_timestamp AS fact_load_timestamp
FROM main.silver.silver_cyber_breaches s

LEFT JOIN main.gold.dim_organization o
    ON s.organization = o.organization

LEFT JOIN main.gold.dim_organization_type ot
    ON s.organization_type = ot.organization_type

LEFT JOIN main.gold.dim_breach_method bm
    ON s.breach_method = bm.breach_method

LEFT JOIN main.gold.dim_year y
    ON s.year = y.year;

num_affected_rows,num_inserted_rows


In [0]:
SHOW TABLES IN main.gold;

database,tableName,isTemporary
gold,dim_breach_method,False
gold,dim_organization,False
gold,dim_organization_type,False
gold,dim_year,False
gold,fact_cyber_breaches,False


In [0]:
----- Consultas nas dimensões

SELECT COUNT(*) AS total_dim_organization
FROM main.gold.dim_organization;

SELECT COUNT(*) AS total_dim_organization_type
FROM main.gold.dim_organization_type;

SELECT COUNT(*) AS total_dim_breach_method
FROM main.gold.dim_breach_method;

SELECT COUNT(*) AS total_dim_year
FROM main.gold.dim_year;



total_dim_organization
331


total_dim_organization_type
70


total_dim_breach_method
24


total_dim_year
19


In [0]:
SELECT COUNT(*) AS total_dim_breach_method
FROM main.gold.dim_breach_method;






total_dim_breach_method
24


In [0]:
----- Consulta na tabela fato


SELECT COUNT(*) AS total_fact
FROM main.gold.fact_cyber_breaches;

total_fact
352


In [0]:

----- Comparativo entre tabelas (Silver e Gold)

SELECT 
    SUM(records_exposed) AS total_records_exposed
FROM main.gold.fact_cyber_breaches;

SELECT 
    SUM(records_exposed)
FROM main.silver.silver_cyber_breaches;


total_records_exposed
103363335836


sum(records_exposed)
103363335836


In [0]:
---Ver uma amostra dos dados

SELECT *
FROM main.gold.fact_cyber_breaches
LIMIT 10;



breach_id,organization_key,organization_type_key,breach_method_key,year_key,records_exposed,fact_load_timestamp
0,3,26,5,13,2200000,2025-12-17T23:56:29.072Z
1,5,53,5,17,14870304,2025-12-17T23:56:29.072Z
2,8,26,14,17,175350,2025-12-17T23:56:29.072Z
3,10,56,5,10,152000000,2025-12-17T23:56:29.072Z
4,9,56,14,16,7500000,2025-12-17T23:56:29.072Z
5,11,26,12,14,4000000,2025-12-17T23:56:29.072Z
6,12,2,5,15,75000,2025-12-17T23:56:29.072Z
7,13,26,12,10,344579,2025-12-17T23:56:29.072Z
8,15,60,14,16,320000000,2025-12-17T23:56:29.072Z
9,14,64,5,15,20000,2025-12-17T23:56:29.072Z


In [0]:
----Teste de integridade (PK e FK)

SELECT COUNT(*) AS fk_organization_null
FROM main.gold.fact_cyber_breaches
WHERE organization_key IS NULL;

SELECT COUNT(*) AS fk_org_type_null
FROM main.gold.fact_cyber_breaches
WHERE organization_type_key IS NULL;

SELECT COUNT(*) AS fk_method_null
FROM main.gold.fact_cyber_breaches
WHERE breach_method_key IS NULL;

SELECT COUNT(*) AS fk_year_null
FROM main.gold.fact_cyber_breaches
WHERE year_key IS NULL;



fk_organization_null
0


fk_org_type_null
0


fk_method_null
1


fk_year_null
3


In [0]:
SELECT organization_key, COUNT(*)
FROM main.gold.dim_organization
GROUP BY organization_key
HAVING COUNT(*) > 1;


organization_key,count(1)


In [0]:

---- Como retornou '3' na dim_year e dim_breach_method foi aplicado a correção 
---- Criar registro “Desconhecido” na dimensão 

---  Inserir linha técnica na dim_year
INSERT INTO main.gold.dim_year (year_key, year)
VALUES (-1, -1);

--- Atualizar a fato para apontar para esse registro
UPDATE main.gold.fact_cyber_breaches
SET year_key = -1
WHERE year_key IS NULL;

--- Aplicar constraint
ALTER TABLE main.gold.fact_cyber_breaches
ADD CONSTRAINT chk_year_key_not_null
CHECK (year_key IS NOT NULL);

num_affected_rows,num_inserted_rows
1,1


num_affected_rows
3


In [0]:
---- Como retornou '1' na dim_year e dim_breach_method foi aplicado a correção 
---- Criar registro “Desconhecido” na dimensão 

---  Inserir linha técnica na dim_breach_method
INSERT INTO main.gold.dim_breach_method (breach_method_key, breach_method)
VALUES (-1, 'Unknown');

--- Atualizar a tabela fato
UPDATE main.gold.fact_cyber_breaches
SET breach_method_key = -1
WHERE breach_method_key IS NULL;

--- Aplicar constraint
ALTER TABLE main.gold.fact_cyber_breaches
ADD CONSTRAINT chk_breach_method_key_not_null
CHECK (breach_method_key IS NOT NULL);


num_affected_rows,num_inserted_rows
1,1


num_affected_rows
1


In [0]:

--- Consultas ---

---- Total de registros comprometidos por ano
SELECT 
    y.year,
    SUM(f.records_exposed) AS total_records
FROM main.gold.fact_cyber_breaches f
JOIN main.gold.dim_year y
    ON f.year_key = y.year_key
GROUP BY y.year
ORDER BY y.year;

---- Total de registros comprometidos método de ataque
SELECT 
    m.breach_method,
    SUM(f.records_exposed) AS total_records
FROM main.gold.fact_cyber_breaches f
JOIN main.gold.dim_breach_method m
    ON f.breach_method_key = m.breach_method_key
GROUP BY m.breach_method
ORDER BY total_records DESC;



year,total_records
-1,90002365208
2004,92510000
2005,46825000
2006,71260000
2007,153286405
2008,69066500
2009,255467987
2010,15980476
2011,227788137
2012,428839698


breach_method,total_records
Hacked,97409783853.0
Poor Security,3811143378.0
Unknown,448253141.0
Poor Security / Hacked,412214295.0
Accidentally Published,269917505.0
Data Exposed By Misconfiguration,250000000.0
Lost / Stolen Media,170434561.0
Unsecured S3 Bucket,106000000.0
Misconfiguration/poor Security,100000000.0
Unprotected Api,100000000.0


In [0]:
SELECT COUNT(*) AS total_nulls
FROM main.gold.fact_cyber_breaches
WHERE records_exposed IS NULL;


total_nulls
30


In [0]:
--- Tratativas

ALTER TABLE main.gold.fact_cyber_breaches
ADD CONSTRAINT chk_records_exposed_positive
CHECK (records_exposed >= 0 OR records_exposed IS NULL);

In [0]:
---Garantindo que as abelas não estão vazias

SELECT 'fact' AS tabela, COUNT(*) AS total FROM main.gold.fact_cyber_breaches
UNION ALL
SELECT 'dim_organization', COUNT(*) FROM main.gold.dim_organization
UNION ALL
SELECT 'dim_organization_type', COUNT(*) FROM main.gold.dim_organization_type
UNION ALL
SELECT 'dim_breach_method', COUNT(*) FROM main.gold.dim_breach_method
UNION ALL
SELECT 'dim_year', COUNT(*) FROM main.gold.dim_year;

tabela,total
fact,352
dim_organization,331
dim_organization_type,70
dim_breach_method,25
dim_year,20


In [0]:
---Consulta FKs não nulas

SELECT
  SUM(CASE WHEN organization_key IS NULL THEN 1 ELSE 0 END) AS org_null,
  SUM(CASE WHEN organization_type_key IS NULL THEN 1 ELSE 0 END) AS type_null,
  SUM(CASE WHEN breach_method_key IS NULL THEN 1 ELSE 0 END) AS method_null,
  SUM(CASE WHEN year_key IS NULL THEN 1 ELSE 0 END) AS year_null
FROM main.gold.fact_cyber_breaches;

org_null,type_null,method_null,year_null
0,0,0,0


In [0]:
SELECT COUNT(*) AS fk_orfa
FROM main.gold.fact_cyber_breaches f
LEFT JOIN main.gold.dim_year d
  ON f.year_key = d.year_key
WHERE d.year_key IS NULL;

fk_orfa
0


In [0]:
--- Teste de consulta analítica

SELECT
  y.year,
  bm.breach_method,
  SUM(f.records_exposed) AS total_records
FROM main.gold.fact_cyber_breaches f
JOIN main.gold.dim_year y ON f.year_key = y.year_key
JOIN main.gold.dim_breach_method bm ON f.breach_method_key = bm.breach_method_key
GROUP BY y.year, bm.breach_method
ORDER BY y.year;

year,breach_method,total_records
-1,Hacked,90002365208.0
2004,"Inside Job, Hacked",92000000.0
2004,Inside Job,510000.0
2005,Hacked,41400000.0
2005,Lost / Stolen Media,5300000.0
2005,Poor Security,125000.0
2006,Accidentally Published,20000000.0
2006,Intentionally Lost,960000.0
2006,Lost / Stolen Media,17200000.0
2006,Hacked,4000000.0


In [0]:

----- ANÁLISE DE DADOS- RESPONDER AS QUESTÕES: 

--1.	Quais são os tipos de ataques mais comuns?
--2.	Por que os ataques às empresas estão aumentando?
--3.	Quais tipos de empresas são mais visadas de ataques?
--4.	Para cada tipo de ataque, qual é a forma mais eficiente de prevenção?
--5.	As análises permitem prever cenários futuros de segurança cibernética? Quais são as perspectivas?



In [0]:
--1.	Quais são os tipos de ataques mais comuns?

SELECT
  bm.breach_method,
  COUNT(*) AS total_incidentes
FROM main.gold.fact_cyber_breaches f
JOIN main.gold.dim_breach_method bm
  ON f.breach_method_key = bm.breach_method_key
GROUP BY bm.breach_method
ORDER BY total_incidentes DESC;


--Frequência absoluta de cada método de ataque
--Identifica os vetores mais recorrentes
--Base para priorização de controles de segurança

-- Interpretação esperada

--Ataques como Hacking, Misconfiguration, Accidentally Published e Insider tendem a aparecer com maior frequência, indicando falhas técnicas e humanas como principais vetores.

breach_method,total_incidentes
Hacked,192
Poor Security,44
Lost / Stolen Media,33
Accidentally Published,21
Inside Job,19
Lost / Stolen Computer,16
Unknown,8
"Improper Setting, Hacked",2
Poor Security/inside Job,2
"Inside Job, Hacked",1


Databricks visualization. Run in Databricks to view.

In [0]:
--2.	Por que os ataques às empresas estão aumentando?

SELECT
  y.year,
  COUNT(*) AS total_incidentes
FROM main.gold.fact_cyber_breaches f
JOIN main.gold.dim_year y
  ON f.year_key = y.year_key
WHERE y.year <> -1
GROUP BY y.year
ORDER BY y.year;

--Esta é uma pergunta interpretativa, não é totalmente estatística.

-- O aumento pode ser associado a:

-- maior volume de dados sensíveis armazenados
-- maior exposição pública
-- maior interesse dos atacantes


year,total_incidentes
2004,2
2005,6
2006,7
2007,12
2008,16
2009,13
2010,19
2011,34
2012,23
2013,28


Databricks visualization. Run in Databricks to view.

In [0]:
--3.	Quais tipos de empresas são mais visadas de ataques?

SELECT
  ot.organization_type,
  COUNT(*) AS total_incidentes
FROM main.gold.fact_cyber_breaches f
JOIN main.gold.dim_organization_type ot
  ON f.organization_type_key = ot.organization_type_key
GROUP BY ot.organization_type
ORDER BY total_incidentes DESC
LIMIT 10;


--Interpretação

--Setores como:

--Government
--Healthcare
--Financial
--Technology
--tendem a concentrar mais incidentes, refletindo:
--valor dos dados
--criticidade dos serviços
--maior visibilidade pública

organization_type,total_incidentes
Web,53
Healthcare,47
Financial,38
Government,30
Retail,27
Tech,19
Academic,13
Gaming,12
Telecoms,12
Hotel,8


Databricks visualization. Run in Databricks to view.

In [0]:
--4.	Para cada tipo de ataque, qual é a forma mais eficiente de prevenção?

SELECT
  bm.breach_method,
  COUNT(*) AS total_incidentes
FROM main.gold.fact_cyber_breaches f
JOIN main.gold.dim_breach_method bm
  ON f.breach_method_key = bm.breach_method_key
GROUP BY bm.breach_method;

--Associação técnica (exemplo)
--Tipo de ataque	Medida de prevenção
--Hacking	Hardening, MFA, IDS/IPS
--Misconfiguration	IaC, revisão de configs
--Accidentally Published	DLP, controle de acesso
--Insider	IAM, segregação de funções
--Lost / Stolen	Criptografia, MDM
-- Observação acadêmica:
--As medidas não vêm do dataset, mas de boas práticas de cibersegurança.

breach_method,total_incidentes
"Inside Job, Hacked",1
Misconfiguration/poor Security,1
"Improper Setting, Hacked",2
Data Exposed By Misconfiguration,1
Unknown,8
Publicly Accessible Amazon Web Services (aws) Server,1
Hacked,192
Accidentally Uploaded,1
Accidentally Exposed,1
Zero-day Vulnerabilities,1


In [0]:
--4.	Para cada tipo de ataque, qual é a forma mais eficiente de prevenção? (COMPLEMENTO)
--Tipo organização X Método de ataque

SELECT
  ot.organization_type,
  bm.breach_method,
  COUNT(*) AS total_incidentes
FROM main.gold.fact_cyber_breaches f
JOIN main.gold.dim_organization_type ot
  ON f.organization_type_key = ot.organization_type_key
JOIN main.gold.dim_breach_method bm
  ON f.breach_method_key = bm.breach_method_key
GROUP BY
  ot.organization_type,
  bm.breach_method
  LIMIT 25;

organization_type,breach_method,total_incidentes
Government,Hacked,8
Tech,Lost / Stolen Computer,1
Energy,Inside Job,2
"Tech, Web",Hacked,1
"Tech, Retail",Accidentally Published,1
Academic,Hacked,9
Hosting Provider,Hacked,1
Military,Hacked,1
Government,Poor Security,4
Social Network,Poor Security,5


Databricks visualization. Run in Databricks to view.

In [0]:

--5.	As análises permitem prever cenários futuros de segurança cibernética? Quais são as perspectivas?


SELECT
  y.year,
  COUNT(*) AS total_incidentes
FROM main.gold.fact_cyber_breaches f
JOIN main.gold.dim_year y
  ON f.year_key = y.year_key
WHERE y.year <> -1
GROUP BY y.year
ORDER BY y.year;

--conclusão técnica
--O dataset permite identificar tendências históricas
--Não permite previsão determinística
--Pode apoiar análises exploratórias e projeções simples
--Modelos preditivos exigiriam:
--dados contínuos
--mais variáveis
--séries temporais completas


year,total_incidentes
2004,2
2005,6
2006,7
2007,12
2008,16
2009,13
2010,19
2011,34
2012,23
2013,28


Databricks visualization. Run in Databricks to view.

In [0]:
--Complemento da pergunta 5 - "As análises permitem prever cenários futuros de segurança cibernética? Quais são as perspectivas? " 
---Evolução dos métodos de ataque ao longo do tempo

SELECT
  y.year,
  bm.breach_method,
  COUNT(*) AS total_incidentes
FROM main.gold.fact_cyber_breaches f
JOIN main.gold.dim_year y
  ON f.year_key = y.year_key
JOIN main.gold.dim_breach_method bm
  ON f.breach_method_key = bm.breach_method_key
WHERE y.year <> -1
GROUP BY y.year, bm.breach_method
ORDER BY y.year, total_incidentes DESC
LIMIT 20;




year,breach_method,total_incidentes
2004,"Inside Job, Hacked",1
2004,Inside Job,1
2005,Lost / Stolen Media,3
2005,Hacked,2
2005,Poor Security,1
2006,Lost / Stolen Media,2
2006,Intentionally Lost,1
2006,Inside Job,1
2006,Lost / Stolen Computer,1
2006,Accidentally Published,1


Databricks visualization. Run in Databricks to view.

In [0]:
SELECT
  y.year,
  COUNT(*) AS total_incidentes
FROM main.gold.fact_cyber_breaches f
JOIN main.gold.dim_year y
  ON f.year_key = y.year_key
WHERE y.year >= 2018
GROUP BY y.year
ORDER BY y.year;


---teste simples

year,total_incidentes
2018,26
2019,30
2020,31
2021,13
2022,5
