In [2]:
#!/usr/bin/env python3
"""
proxy_validator_500.py

Builds 500 proxies using the credential/host pattern from your uploaded file,
starting at port 10457 and incrementing by 1. For each proxy it:

 - exports HTTP_PROXY and HTTPS_PROXY
 - calls a single nba_api endpoint variant (rotates across a small list)
 - records OK vs FAIL
 - prints an immediate success/failure line for each proxy
 - saves results to results.csv + valid_proxies.txt + invalid_proxies.txt

Requires: nba_api installed and network access.
"""

import os
import time
import csv
import random
from datetime import datetime
from nba_api.stats.endpoints import LeagueGameLog, LeagueDashTeamStats

# ---------- CONFIG ----------
#START_PORT = 10457 # UNCOMMENT THIS FOR A RESTART RUN IF YOU WANT
START_PORT = 10564
NUM_PROXIES = 483
#NUM_PROXIES = 500
TIMEOUT = 12.0               # seconds for the nba_api call
SUCCESS_PAUSE = (0.10, 0.35) # small pause after success
FAILURE_COOLDOWN_SEC = 2     # short cooldown after failure (you can increase)
BASE_PROXY_TEMPLATE = "http://spbi6ee2j0:jD7Pk~5fMlcV4ty2bt@dc.decodo.com:{port}"
# END CONFIG

# small set of safe endpoints to use (one call per proxy)
VARIANTS = [
    ("LeagueGameLog:T", lambda: LeagueGameLog(season="2024-25", player_or_team_abbreviation="T", timeout=TIMEOUT).get_data_frames()[0]),
    ("LeagueGameLog:P", lambda: LeagueGameLog(season="2024-25", player_or_team_abbreviation="P", timeout=TIMEOUT).get_data_frames()[0]),
    ("LeagueDashTeamStats", lambda: LeagueDashTeamStats(season="2024-25", timeout=TIMEOUT).get_data_frames()[0]),
]

# Keep a copy of original env so we can restore
BASE_HTTP = os.environ.get("HTTP_PROXY")
BASE_HTTPS = os.environ.get("HTTPS_PROXY")

def _mask(proxy_url):
    if not proxy_url:
        return "(DIRECT)"
    try:
        scheme, rest = proxy_url.split("://", 1)
        creds, host = rest.split("@", 1)
        if ":" in creds:
            return f"{scheme}://***:***@{host}"
    except Exception:
        pass
    return proxy_url

def _set_proxy_env(proxy_url):
    if proxy_url:
        os.environ["HTTP_PROXY"] = proxy_url
        os.environ["HTTPS_PROXY"] = proxy_url
    else:
        os.environ.pop("HTTP_PROXY", None)
        os.environ.pop("HTTPS_PROXY", None)

def validate_proxies(start_port, count):
    results = []  # rows: (index, port, proxy, status, rows, elapsed, error_message, timestamp)
    valid = []
    invalid = []

    print(f"Proxy validation start: {datetime.now().isoformat()}  testing {count} proxies from port {start_port}")

    for i in range(count):
        port = start_port + i
        proxy = BASE_PROXY_TEMPLATE.format(port=port)
        proxy_label = _mask(proxy)
        _set_proxy_env(proxy)

        # choose a variant to call (rotate/random)
        name, fn = random.choice(VARIANTS)

        t0 = time.perf_counter()
        try:
            df = fn()  # call the endpoint
            elapsed = time.perf_counter() - t0
            rows = len(df) if df is not None else 0
            status = "OK"
            results.append((i+1, port, proxy, status, rows, round(elapsed, 2), "", datetime.now().isoformat()))
            valid.append(proxy)
            # Immediate print update per proxy (success)
            print(f"[{i+1:03d}] ✅ {name:<20} via {proxy_label:40s} rows={rows:<5} {elapsed:.2f}s")

            # small random pause to avoid hammering
            time.sleep(random.uniform(*SUCCESS_PAUSE))

        except Exception as e:
            elapsed = time.perf_counter() - t0
            msg = str(e)
            # classify briefly
            if "Read timed out" in msg or "timeout" in msg.lower():
                status = "timeout"
            elif "429" in msg:
                status = "rate_limited"
            elif "403" in msg or "Forbidden" in msg:
                status = "blocked"
            else:
                status = "error"

            results.append((i+1, port, proxy, status, 0, round(elapsed, 2), msg, datetime.now().isoformat()))
            invalid.append(proxy)
            # Immediate print update per proxy (failure)
            print(f"[{i+1:03d}] ❌ {name:<20} via {proxy_label:40s} {status} — {elapsed:.2f}s  {msg}")

            # cooldown so behavior is easier to observe (and avoid instant repeats)
            time.sleep(FAILURE_COOLDOWN_SEC)

    # restore env
    if BASE_HTTP is None:
        os.environ.pop("HTTP_PROXY", None)
    else:
        os.environ["HTTP_PROXY"] = BASE_HTTP
    if BASE_HTTPS is None:
        os.environ.pop("HTTPS_PROXY", None)
    else:
        os.environ["HTTPS_PROXY"] = BASE_HTTPS

    return results, valid, invalid

def save_results_csv(results, filename="results.csv"):
    with open(filename, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["index","port","proxy","status","rows","elapsed_s","error","timestamp"])
        for row in results:
            writer.writerow(row)

def save_list(lst, path):
    with open(path, "w") as f:
        for item in lst:
            f.write(item + "\n")

if __name__ == "__main__":
    results, valid, invalid = validate_proxies(START_PORT, NUM_PROXIES)

    # save
    save_results_csv(results, "results.csv")
    save_list(valid, "valid_proxies.txt")
    save_list(invalid, "invalid_proxies.txt")

    ok = len(valid)
    fail = len(invalid)
    print(f"\nFinished: {ok} valid / {fail} invalid")
    print("Saved: results.csv, valid_proxies.txt, invalid_proxies.txt")


Proxy validation start: 2025-09-27T17:53:45.996489  testing 483 proxies from port 10564
[001] ❌ LeagueGameLog:P      via http://***:***@dc.decodo.com:10564       timeout — 12.31s  HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=12.0)
[002] ❌ LeagueGameLog:P      via http://***:***@dc.decodo.com:10565       timeout — 12.30s  HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=12.0)
[003] ❌ LeagueDashTeamStats  via http://***:***@dc.decodo.com:10566       timeout — 12.26s  HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=12.0)
[004] ❌ LeagueDashTeamStats  via http://***:***@dc.decodo.com:10567       timeout — 12.52s  HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=12.0)
[005] ❌ LeagueGameLog:T      via http://***:***@dc.decodo.com:10568       timeout — 12.28s  HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=12.0)
[006] ❌

KeyboardInterrupt: 