In [2]:
# Notebook: Secret scanner + matplotlib plot
import os, re, pandas as pd, matplotlib.pyplot as plt

# --------- Config ---------
SCAN_ROOT = "/mnt/data"  # change this to any path you uploaded
IGNORED_EXT = {'.png', '.jpg', '.jpeg', '.gif', '.class', '.dex', '.so'}

PATTERNS = {
    "Google_API_KEY": re.compile(r"AIza[0-9A-Za-z-_]{35}"),
    "Branch_key_live": re.compile(r"key_live_[0-9A-Za-z]{8,}"),
    "Branch_key_test": re.compile(r"key_test_[0-9A-Za-z]{8,}"),
    "OneSignal_APP_ID": re.compile(r"[0-9a-fA-F\\-]{36,}"),
    "Generic_Bearer": re.compile(r"(?:Bearer|bearer)\\s+[A-Za-z0-9\\-\._~\+\/]+=*"),
    "Generic_API_Key": re.compile(r"(?:api[_-]?key|apikey|secret|client[_-]?id)[\"'\\s:=]{1,4}([A-Za-z0-9\-_\.]{16,})", re.IGNORECASE),
    "Firebase_Project_Number": re.compile(r"\\b\\d{9,12}\\b"),
    "Possible_URL": re.compile(r"https?://[^\\s'\"<>]+"),
}

# --------- Scanner ---------
def scan_file(path):
    results = []
    try:
        with open(path, "r", errors="ignore") as f:
            text = f.read()
    except Exception:
        return results
    for name, pattern in PATTERNS.items():
        for m in pattern.finditer(text):
            start = max(0, m.start()-60)
            end = min(len(text), m.end()+60)
            snippet = text[start:end].replace("\n", " ")
            if len(snippet) > 1000:
                snippet = snippet[:1000] + " ..."
            results.append({
                "file": path.replace(SCAN_ROOT, "./").lstrip("./"),
                "pattern": name,
                "match": m.group(0),
                "context": snippet
            })
    return results

def scan_directory(root):
    findings = []
    for dirpath, dirs, files in os.walk(root):
        for fname in files:
            _, ext = os.path.splitext(fname)
            if ext.lower() in IGNORED_EXT:
                continue
            full = os.path.join(dirpath, fname)
            findings.extend(scan_file(full))
    return findings

# Run scan
print(f"Scanning directory: {SCAN_ROOT} ... (this may take a moment)")
findings = scan_directory(SCAN_ROOT)
df = pd.DataFrame(findings)

if df.empty:
    print("No likely secrets found in the scanned path with current heuristics.")
else:
    counts = df['pattern'].value_counts().rename_axis('pattern').reset_index(name='hits')
    display(df)
    display(counts)
    print(f"\nScan summary: {len(df)} potential hits found. Treat all as compromised until rotated/restricted.")

    plt.figure(figsize=(8,4))
    plt.bar(counts['pattern'], counts['hits'])
    plt.xlabel('Pattern')
    plt.ylabel('Hits')
    plt.title('Secret Scanner: Hits by Pattern')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    out_png = '/mnt/data/secret_scan_pattern_counts.png'
    plt.savefig(out_png)
    plt.show()
    print(f'Saved plot to: {out_png}')

print('\nNext steps (recommended):')
print('1) Rotate & restrict any keys found.')
print('2) Fix build/config to avoid embedding secrets.')
print('3) Rebuild and republish cleaned app.')

df.head(10)

error: bad character range \\-\. at position 31