<a href="https://colab.research.google.com/github/farzadmohseni-ir/ig-request-cleaner/blob/main/ig_request_cleaner.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ============================================
#  Instagram Login (with 2FA / Challenge) in Colab
#  - Uses instagrapi (private API) -> Risk of account restriction exists.
#  - Saves session to avoid frequent logins.
# ============================================

!pip -q install instagrapi

import os, json, getpass, time
from instagrapi import Client
from instagrapi.exceptions import (
    TwoFactorRequired, ChallengeRequired, LoginRequired,
    PleaseWaitFewMinutes, RateLimitError
)

SESSION_FILE = "/content/ig_session.json"

def save_session(cl: Client, path: str = SESSION_FILE):
    try:
        settings = cl.get_settings()
        with open(path, "w") as f:
            json.dump(settings, f)
        print(f"✅ Session saved to: {path}")
    except Exception as e:
        print("⚠️ Could not save session:", e)

def load_session(cl: Client, path: str = SESSION_FILE) -> bool:
    if not os.path.exists(path):
        return False
    try:
        with open(path, "r") as f:
            settings = json.load(f)
        cl.set_settings(settings)
        print("ℹ️ Session settings loaded from file.")
        return True
    except Exception as e:
        print("⚠️ Could not load session:", e)
        return False

def prompt_creds():
    username = input("👤 Instagram username: ").strip()
    password = getpass.getpass("🔒 Instagram password: ")
    return username, password

def login_with_flow(cl: Client):
    """
    Login flow:
      1) Try reusing saved session (relogin=True)
      2) If fails -> normal login
      3) If TwoFactorRequired -> ask for code and retry
      4) If ChallengeRequired -> guide and resolve via handler (email/sms)
    """
    # Optional: slightly randomize device to reduce friction
    cl.set_locale("en_US")
    cl.delay_range = [2, 4]  # be gentle with requests

    reused = load_session(cl)
    username = None
    password = None

    if reused:
        try:
            # re-login to refresh cookies if needed
            cl.login("", "", relogin=True)
            print("✅ Logged in using saved session.")
            return True
        except LoginRequired:
            print("ℹ️ Saved session exists but login required again.")
        except Exception as e:
            print("⚠️ Re-login with saved session failed:", e)

    # Fresh login
    username, password = prompt_creds()

    try:
        cl.login(username, password)
        print("✅ Logged in without 2FA.")
        save_session(cl)
        return True

    except TwoFactorRequired:
        print("🔐 2FA is enabled on your account.")
        # If you use an authenticator app (TOTP), enter that code here.
        # If you got an SMS / email code, enter it here too.
        verification_code = input("Enter your 2FA code: ").strip()
        try:
            cl.login(username, password, verification_code=verification_code)
            print("✅ Logged in with 2FA.")
            save_session(cl)
            return True
        except Exception as e2:
            print("❌ 2FA login failed:", e2)
            return False

    except ChallengeRequired as ce:
        print("🟡 Challenge required by Instagram.")
        print("   You may need to choose where to receive a code (SMS/Email) and enter it.")

        # Define a handler that will be called by instagrapi to get the code.
        def code_handler(username_select: str, choice: str):
            # 'choice' could be "email" or "sms" depending on IG prompt.
            print(f"📨 Instagram will send a code via: {choice}")
            return input("Enter the verification code you received: ").strip()

        cl.challenge_code_handler = code_handler

        try:
            # Try to resolve challenge. Depending on your account it may ask to choose email/sms in-app.
            # Sometimes cl.challenge_resolve() triggers sending a code automatically and calls our handler.
            ok = cl.challenge_resolve(username or "")
            if ok:
                print("✅ Challenge resolved. Logged in.")
                save_session(cl)
                return True
            else:
                print("❌ Challenge could not be resolved automatically.")
                return False
        except Exception as e3:
            print("❌ Challenge resolution failed:", e3)
            return False

    except PleaseWaitFewMinutes as e:
        print("⏳ Instagram rate-limited you. Try later.", e)
        return False
    except RateLimitError as e:
        print("⏳ Rate limit error. Slow down / wait a while.", e)
        return False
    except Exception as e:
        print("❌ Login failed:", e)
        return False


# ============= Run =============
cl = Client()
if login_with_flow(cl):
    # Quick sanity check to prove we're in:
    try:
        me = cl.account_info()
        print(f"👋 Logged in as: @{me.username} (id={me.pk})")
    except Exception as e:
        print("Logged in but could not fetch account info:", e)
else:
    print("🚫 Not logged in.")

In [None]:
# ============================================
# Resume-safe parallel cancel of pending follow requests
#  - Skips users that are ALREADY canceled (from existing CSV log)
#  - Retries transient failures with exponential backoff + jitter
#  - Random human-like delays and small thread pool
# ============================================

import os, csv, time, random, threading
from concurrent.futures import ThreadPoolExecutor, as_completed
from bs4 import BeautifulSoup
import pandas as pd

# -------- Config --------
HTML_PATH     = "/content/pending_follow_requests.html"
EXCLUDE_USERS = {"reza.frxond"}      # accounts to keep
LIMIT         = None                  # e.g., 400 for batch processing; None = all
DRY_RUN       = False                 # True = do not call API, only log
MAX_WORKERS   = 3                     # keep this small to reduce risk
DELAY_MIN     = 1.8                   # random delay lower bound (seconds)
DELAY_MAX     = 4.6                   # random delay upper bound (seconds)
LOG_CSV       = "/content/cancel_follow_requests_log.csv"
MAX_RETRIES   = 3                     # retry attempts on transient errors
BACKOFF_BASE  = 2.0                   # exponential backoff base

# -------- Pre-check: must be logged in --------
try:
    cl  # noqa
except NameError:
    raise SystemExit("❌ Instagram client `cl` not found. Run the login cell first.")

# -------- Parse HTML and collect usernames --------
if not os.path.exists(HTML_PATH):
    raise FileNotFoundError(f"HTML not found: {HTML_PATH}")

with open(HTML_PATH, "r", encoding="utf-8") as f:
    soup = BeautifulSoup(f.read(), "html.parser")

usernames = []
for a in soup.find_all("a", href=True):
    if "instagram.com" in a["href"] and a.text.strip():
        usernames.append(a.text.strip())

# Deduplicate while preserving order
usernames = list(dict.fromkeys(usernames))
# Exclude explicitly
usernames = [u for u in usernames if u not in EXCLUDE_USERS]

# -------- Read existing log and build resume state --------
# Only skip users that are already 'canceled' successfully.
already_canceled = set()
if os.path.exists(LOG_CSV):
    try:
        prev = pd.read_csv(LOG_CSV)
        # Normalize column presence
        if "username" in prev.columns and "status" in prev.columns:
            already_canceled = set(prev.loc[prev["status"] == "canceled", "username"].dropna().astype(str))
    except Exception as e:
        print("⚠️ Could not read existing log. Resume will not skip:", e)

# Filter out already canceled users
to_process = [u for u in usernames if u not in already_canceled]

# Apply LIMIT for batching if desired
if LIMIT:
    to_process = to_process[:LIMIT]

print(f"📋 Total in HTML: {len(usernames)} | To process now: {len(to_process)}")
print(f"🚫 Excluded: {', '.join(EXCLUDE_USERS) if EXCLUDE_USERS else '-'}")
if already_canceled:
    print(f"↩️  Already canceled (skipped): {len(already_canceled)}")

# -------- Prepare CSV (create header if missing) --------
CSV_FIELDS = ["username", "user_id", "status", "message", "timestamp"]
csv_lock = threading.Lock()
print_lock = threading.Lock()

if not os.path.exists(LOG_CSV):
    with open(LOG_CSV, "w", newline="", encoding="utf-8") as f:
        csv.writer(f).writerow(CSV_FIELDS)

def log_row(row_list):
    """Thread-safe append to CSV."""
    with csv_lock:
        with open(LOG_CSV, "a", newline="", encoding="utf-8") as f:
            csv.writer(f).writerow(row_list)

def human_delay():
    """Randomized human-like delay."""
    time.sleep(random.uniform(DELAY_MIN, DELAY_MAX))

def backoff_sleep(attempt: int):
    """Exponential backoff with jitter."""
    base = (BACKOFF_BASE ** attempt)
    jitter = random.uniform(0.6, 1.4)
    time.sleep(base * jitter)

def cancel_for_username(uname: str):
    """Resolve user_id and cancel pending request with retries."""
    ts = time.strftime("%Y-%m-%d %H:%M:%S")

    # Resolve user id with small retry (in case of transient lookup errors)
    uid = None
    for attempt in range(MAX_RETRIES):
        try:
            uid = cl.user_id_from_username(uname)
            break
        except Exception as e_uid:
            if attempt < MAX_RETRIES - 1:
                backoff_sleep(attempt + 1)
                continue
            log_row([uname, "", "resolve_failed", str(e_uid), ts])
            with print_lock:
                print(f"  ❌ @{uname} -> resolve_failed: {e_uid}")
            human_delay()
            return ("resolve_failed", uname)

    if DRY_RUN:
        log_row([uname, uid, "dry_run", "", ts])
        with print_lock:
            print(f"  🔎 DRY-RUN @{uname} (id={uid})")
        human_delay()
        return ("dry_run", uname)

    # Try to cancel with retries
    for attempt in range(MAX_RETRIES):
        try:
            ok = cl.user_unfollow(uid)  # for pending requests this cancels them
            if ok:
                log_row([uname, uid, "canceled", "", ts])
                with print_lock:
                    print(f"  ✅ Canceled @{uname} (id={uid})")
                human_delay()
                return ("canceled", uname)
            else:
                # API returned False; retry a couple of times, then give up.
                if attempt < MAX_RETRIES - 1:
                    backoff_sleep(attempt + 1)
                    continue
                log_row([uname, uid, "api_false", "", ts])
                with print_lock:
                    print(f"  ⚠️ API False @{uname} (id={uid})")
                human_delay()
                return ("api_false", uname)
        except Exception as e_unf:
            # Rate limits / transient server errors will land here.
            if attempt < MAX_RETRIES - 1:
                with print_lock:
                    print(f"  ⏳ Retry @{uname} due to: {e_unf}")
                backoff_sleep(attempt + 1)
                continue
            log_row([uname, uid, "exception", str(e_unf), ts])
            with print_lock:
                print(f"  ❌ Exception @{uname}: {e_unf}")
            human_delay()
            return ("exception", uname)

# -------- Parallel execution --------
success = fail = 0
status_counts = {}

with ThreadPoolExecutor(max_workers=MAX_WORKERS) as ex:
    futures = {ex.submit(cancel_for_username, u): u for u in to_process}
    for idx, fut in enumerate(as_completed(futures), 1):
        status, uname = fut.result()
        status_counts[status] = status_counts.get(status, 0) + 1
        if status == "canceled":
            success += 1
        elif status in ("dry_run",):
            pass
        else:
            fail += 1
        with print_lock:
            print(f"[{idx}/{len(futures)}] done -> @{uname} ({status})")

# -------- Summary & preview --------
print("\n================ SUMMARY ================")
print(f"✅ Canceled in this run: {success}")
print(f"❌ Failed in this run : {fail}")
print(f"🗂 Log CSV            : {LOG_CSV}")
print("📊 Status breakdown   :", status_counts)

df = pd.read_csv(LOG_CSV)
display(df.tail(20))