# Industrial Security: Illegal Connection Detection (AI + Aerial Mapping)
1) Load data  2) EDA  3) Train model  4) Prioritize inspections


In [None]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.ensemble import RandomForestClassifier
rollup = pd.read_csv('../data/meters_illegal_connections_rollup.csv')
ts = pd.read_csv('../data/meters_illegal_connections_timeseries.csv')
rollup.head()

In [None]:
plt.figure(figsize=(6,6))
plt.scatter(rollup['lon'], rollup['lat'], s=6, alpha=0.5)
plt.title('Meters - Rough Geo Scatter')
plt.xlabel('Longitude'); plt.ylabel('Latitude')
plt.show()

In [None]:
target = 'label_illegal_connection'
feature_cols = [c for c in rollup.columns if c not in ['meter_id','label_illegal_connection','label_faulty_meter']]
X = rollup[feature_cols]
y = rollup[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)
clf = RandomForestClassifier(n_estimators=200, random_state=42, class_weight='balanced')
clf.fit(X_train, y_train)
proba = clf.predict_proba(X_test)[:,1]
pred = (proba>=0.5).astype(int)
print(classification_report(y_test, pred))
print('ROC AUC:', roc_auc_score(y_test, proba))

In [None]:
test_with_scores = X_test.copy()
test_with_scores['meter_id'] = rollup.loc[X_test.index, 'meter_id']
test_with_scores['prob_illegal'] = proba
toplist = test_with_scores.sort_values(['prob_illegal','aerial_anomaly_score'], ascending=False)
toplist[['meter_id','prob_illegal','aerial_anomaly_score','tamper_events_60d','kwh_mean_60d','phase_imbalance_pct']].head(20)

Next: replace aerial score with CV results, deploy to Vertex AI, and schedule batch predictions with Vertex Pipelines.