In [1]:
# importando pandas e logging

import pandas as pd
import logging

In [2]:
# Configuração de logging

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
)

In [3]:
# Caminhos dos arquivos gerados
output_cleaned = "../data/output/cleaned_data.csv"
output_invalid = "../logs/invalid_mandatory_data.csv"
output_optional_issues = "../logs/optional_issues.csv"
quality_report_file = "../data/output/quality_report.csv"
avg_risk_score_by_region_file = "../data/output/avg_risk_score_by_region.csv"
top_receiving_addresses_file = "../data/output/top_receiving_addresses.csv"

In [4]:
# Carregar e exibir os dados limpos
try:
    logging.info("Carregando dados limpos...")
    valid_data = pd.read_csv(output_cleaned)
    display(valid_data.head())
except FileNotFoundError:
    logging.error(f"Arquivo {output_cleaned} não encontrado.")

2025-01-03 00:22:41,629 - INFO - Carregando dados limpos...


Unnamed: 0,timestamp,sending_address,receiving_address,amount,transaction_type,location_region,ip_prefix,login_frequency,session_duration,purchase_pattern,age_group,risk_score,anomaly
0,1618185002,0x9d32d0bf2c00f41ce7ca01b66e174cc4dcb0c1da,0x39f82e1c09bc6d7baccc1e79e5621ff812f50572,67435.0,transfer,Europe,192.0,3,48,focused,established,18.75,low_risk
1,1698642474,0xd6e251c23cbf52dbd472f079147873e655d8096f,0x51e8fbe24f124e0e30a614e14401b9bbfed5384c,1.0,purchase,South America,172.0,5,61,focused,established,25.0,low_risk
2,1619180066,0x2e0925b922fed01f6a85d213ae2718f54b8ca305,0x52c7911879f783d590af45bda0c0ef2b8536706f,66211.0,purchase,Asia,192.168,3,74,focused,established,31.25,low_risk
3,1591413882,0x93efefc25fcaf31d7695f28018d7a11ece55457f,0x8ac3b7bd531b3a833032f07d4e47c7af6ea7bace,14998.0,transfer,South America,172.0,8,111,high_value,veteran,36.75,low_risk
4,1611257295,0xad3b8de45d63f5cce28aef9a82cf30c397c6ceb9,0x6fdc047c2391615b3facd79b4588c7e9106e49f2,66002.0,sale,Africa,172.16,6,100,high_value,veteran,62.5,moderate_risk


In [5]:
# Carregar e exibir os dados inválidos
try:
    logging.info("Carregando dados inválidos...")
    invalid_data = pd.read_csv(output_invalid)
    display(invalid_data.head())
except FileNotFoundError:
    logging.error(f"Arquivo {output_invalid} não encontrado.")

2025-01-03 00:23:24,114 - INFO - Carregando dados inválidos...


Unnamed: 0,timestamp,sending_address,receiving_address,amount,transaction_type,location_region,ip_prefix,login_frequency,session_duration,purchase_pattern,age_group,risk_score,anomaly
0,1590044893,0xfcf9a3467d3d93688fb17b4ebb6b681b1fa29f94,0x6d4f7db757a1bd6bb43029ee13c037651ca20c74,65319.0,purchase,North America,172.16,4,62,focused,established,none,low_risk
1,1661550384,0xd2cff98e8e707049db92500414fec6f0bb5c895c,0x2e0925b922fed01f6a85d213ae2718f54b8ca305,67174.0,purchase,South America,192.0,2,38,random,new,none,low_risk
2,1670045880,0xddbe1291b454f9b8699220f1e8724aa641ef4b42,0x51c0d24ccc5d9b0dd793052cb2d41efde2568056,15462.0,sale,Africa,172.0,6,144,high_value,veteran,none,low_risk
3,1669402947,0xffa670245089044b1e355508a7843692a25a5e52,0xdce371e2762ac4250469f709c5b766e754e2c9a2,59980.0,purchase,Asia,10.0,1,38,random,new,none,low_risk
4,1606819329,0xc3b34de3e302dc5c357c9647b1bd223eea847ff5,0xd08041262c12eb441ffb76d90aac6bbd4ead64f9,19323.0,transfer,0,172.16,8,99,high_value,veteran,36.75,low_risk


In [6]:
# Carregar e exibir os problemas opcionais
try:
    logging.info("Carregando problemas opcionais...")
    optional_issues = pd.read_csv(output_optional_issues)
    display(optional_issues.head())
except FileNotFoundError:
    logging.error(f"Arquivo {output_optional_issues} não encontrado.")

2025-01-03 00:23:53,606 - INFO - Carregando problemas opcionais...


Unnamed: 0,timestamp,sending_address,receiving_address,amount,transaction_type,location_region,ip_prefix,login_frequency,session_duration,purchase_pattern,age_group,risk_score,anomaly


In [7]:
# Carregar e exibir o relatório de qualidade
try:
    logging.info("Carregando relatório de qualidade...")
    report = pd.read_csv(quality_report_file)
    display(report)
except FileNotFoundError:
    logging.error(f"Arquivo {quality_report_file} não encontrado.")

2025-01-03 00:24:20,602 - INFO - Carregando relatório de qualidade...


Unnamed: 0,Column,Total Records,Filled Values,Completeness (%),Valid Values,Conformity (%),Unique Values
0,timestamp,9291894,9291894,100.0,9291894,100.0,8958709
1,amount,9291894,9291894,100.0,9241894,99.46,76772
2,sending_address,9291894,9291894,100.0,9291894,100.0,1161
3,receiving_address,9291894,9291894,100.0,9291894,100.0,1166
4,transaction_type,9291894,9291894,100.0,9291894,100.0,5
5,location_region,9291894,9291894,100.0,9241894,99.46,6
6,risk_score,9291894,9291894,100.0,9241668,99.46,33
7,ip_prefix,9291894,9291894,100.0,9291894,100.0,5
8,login_frequency,9291894,9291894,100.0,9291894,100.0,8
9,session_duration,9291894,9291894,100.0,9291894,100.0,140


In [8]:
# Carregar e exibir a média de risk_score por região
try:
    logging.info("Carregando média de risk_score por região...")
    avg_risk_score_by_region = pd.read_csv(avg_risk_score_by_region_file)
    display(avg_risk_score_by_region)
except FileNotFoundError:
    logging.error(f"Arquivo {avg_risk_score_by_region_file} não encontrado.")

2025-01-03 00:27:08,236 - INFO - Carregando média de risk_score por região...


Unnamed: 0,location_region,risk_score
0,North America,45.154834
1,South America,45.139408
2,Asia,44.994572
3,Africa,44.902219
4,Europe,44.598708


In [9]:
# Carregar e exibir os top receiving addresses
try:
    logging.info("Carregando top receiving addresses...")
    top_receiving_addresses = pd.read_csv(top_receiving_addresses_file)
    display(top_receiving_addresses)
except FileNotFoundError:
    logging.error(f"Arquivo {top_receiving_addresses_file} não encontrado.")

2025-01-03 00:27:46,154 - INFO - Carregando top receiving addresses...


Unnamed: 0,receiving_address,max_amount,latest_timestamp
0,0xdce371e2762ac4250469f709c5b766e754e2c9a2,76771.0,1704166510
1,0x11f40ae67f6b648e8b4bbc2d1a04c665214f7d25,76769.0,1704121865
2,0xbcbcacf1f2e151263e3890da058f6fa2b7b881d9,76767.0,1704188314
