# Domain Risk Training and Evaluation

This notebook trains the domain risk model and evaluates PR/confusion metrics.

In [1]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import PrecisionRecallDisplay, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split

from sentineldns.features.domain_features import build_domain_feature_matrix
from sentineldns.models.domain_risk import train_domain_risk_model

csv_path = Path('../data/processed/labeled_domains.csv')
df = pd.read_csv(csv_path)
df['label'].value_counts()

label
0    99990
1     2597
Name: count, dtype: int64

In [2]:
metrics = train_domain_risk_model(csv_path)
metrics

{'model_version': '20260217T194848Z',
 'train_rows': 76940,
 'test_rows': 25647,
 'threshold': 0.9999996930372802,
 'target_fpr': 0.01,
 'scalar_feature_names': ['length',
  'num_labels',
  'tld_hash',
  'digit_ratio',
  'hyphen_count',
  'vowel_ratio',
  'entropy',
  'punycode_flag',
  'has_suspicious_words',
  'brand_edit_distance_min'],
 'precision_curve_points': 25648,
 'recall_curve_points': 25648,
 'confusion_matrix': [[24998, 0], [648, 1]]}

In [3]:
# Basic false-positive / true-positive review
df_mal = df[df.label == 1].head(20)
df_ben = df[df.label == 0].head(20)
display(df_mal)
display(df_ben)

Unnamed: 0,domain,label,source,raw_value
99990,0022a601.pphost.net,1,urlhaus_or_phishtank,0022a601.pphost.net
99991,1.2.185.219,1,urlhaus_or_phishtank,1.2.185.219
99992,1.222.192.112,1,urlhaus_or_phishtank,1.222.192.112
99993,1.244.47.211,1,urlhaus_or_phishtank,1.244.47.211
99994,1.55.243.196,1,urlhaus_or_phishtank,1.55.243.196
99995,1.64.40.207,1,urlhaus_or_phishtank,1.64.40.207
99996,1.94.183.238,1,urlhaus_or_phishtank,1.94.183.238
99997,1.94.184.17,1,urlhaus_or_phishtank,1.94.184.17
99998,1.off3.ru,1,urlhaus_or_phishtank,1.off3.ru
99999,101.126.11.168,1,urlhaus_or_phishtank,101.126.11.168


Unnamed: 0,domain,label,source,raw_value
0,000cheapdomains.com,0,tranco,000cheapdomains.com
1,000webhost.com,0,tranco,000webhost.com
2,000webhostapp.com,0,tranco,000webhostapp.com
3,003ms.ru,0,tranco,003ms.ru
4,007.com,0,tranco,007.com
5,007names.net,0,tranco,007names.net
6,0088dns.com,0,tranco,0088dns.com
7,009.am,0,tranco,009.am
8,009.xn--p1ai,0,tranco,009.xn--p1ai
9,00gate.com,0,tranco,00gate.com
