In [None]:
!pip install transformers

In [None]:
!pip install torch

In [None]:
!pip install pillow 

In [None]:
!pip install numpy 

In [None]:
!pip install requests 

In [None]:
!pip install undetected-chromedriver

In [None]:
import time
import os
import shutil
import torch
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException


In [None]:
# --- CONFIGURATION ---
MODEL_ID = "openai/clip-vit-base-patch16" 
LOCAL_MODEL_PATH = "./local_clip_model_optimized"
BASE_DATA_FOLDER = "image_data"

In [None]:
# --- PROMPT ENGINEERING ---
# This helps the AI distinguish between similar objects (e.g., Bus vs Truck).
CAPTCHA_CONFIG = {
    "crosswalk": {
        "pos": ["a pedestrian crosswalk", "white zebra crossing lines", "crosswalk markings on road"],
        "neg": ["plain asphalt road", "sidewalk", "grass", "white line on side of road"],
        "thresh": 0.55 # Lower threshold because crosswalks are blurry
    },
    "traffic light": {
        "pos": ["a traffic light", "traffic signal", "red light", "green light", "yellow light"],
        "neg": ["street light pole", "plain sky", "tree", "building window", "street sign"],
        "thresh": 0.65
    },
    "fire hydrant": {
        "pos": ["a fire hydrant", "red fireplug", "yellow fire hydrant"],
        "neg": ["mailbox", "trash can", "sidewalk", "red bucket", "grass"],
        "thresh": 0.70 # High threshold because hydrants are distinct
    },
    "bus": {
        "pos": ["a city bus", "a transit bus", "double decker bus", "school bus"],
        "neg": ["a truck", "a van", "a car", "train", "plain street"],
        "thresh": 0.65
    },
    "bicycle": {
        "pos": ["a bicycle", "bike wheel", "cyclist", "handlebars"],
        "neg": ["motorcycle", "scooter", "car", "wheelchair"],
        "thresh": 0.65
    },
    "motorcycle": {
        "pos": ["a motorcycle", "motorbike", "rider on motorcycle"],
        "neg": ["bicycle", "scooter", "car"],
        "thresh": 0.60
    },
    "stairs": {
        "pos": ["outdoor stairs", "concrete steps", "staircase"],
        "neg": ["ladder", "striped shirt", "window blinds", "building texture"],
        "thresh": 0.65
    },
    "chimney": {
        "pos": ["a roof chimney", "brick chimney", "smoke stack"],
        "neg": ["tree trunk", "antenna", "cloud"],
        "thresh": 0.65
    },
    "bridge": {
        "pos": ["a bridge", "overpass", "viaduct", "suspension bridge"],
        "neg": ["road level", "river water", "sky"],
        "thresh": 0.60
    },
    # Fallback for unknown items
    "default": {
        "pos": [], # Will be filled dynamically
        "neg": ["something else", "blurry background", "plain wall"],
        "thresh": 0.60
    }
}

In [None]:
# --- LOAD MODEL ---
if os.path.exists(LOCAL_MODEL_PATH):
    print(f"Loading optimized model from {LOCAL_MODEL_PATH}...")
    model = CLIPModel.from_pretrained(LOCAL_MODEL_PATH)
    processor = CLIPProcessor.from_pretrained(LOCAL_MODEL_PATH)
else:
    print(f"Downloading optimized model ({MODEL_ID})...")
    model = CLIPModel.from_pretrained(MODEL_ID)
    processor = CLIPProcessor.from_pretrained(MODEL_ID)
    model.save_pretrained(LOCAL_MODEL_PATH)
    processor.save_pretrained(LOCAL_MODEL_PATH)

In [None]:
def clean_data_folder():
    if os.path.exists(BASE_DATA_FOLDER):
        shutil.rmtree(BASE_DATA_FOLDER)
    os.makedirs(BASE_DATA_FOLDER)

def create_round_folder(round_num):
    folder = os.path.join(BASE_DATA_FOLDER, f"round_{round_num}")
    if not os.path.exists(folder):
        os.makedirs(folder)
    return folder

In [None]:
def get_smart_prediction(image, target_name, tile_index):
    # 1. Select Config
    config = CAPTCHA_CONFIG.get(target_name, CAPTCHA_CONFIG["default"])
    
    # 2. Build Prompts
    if target_name not in CAPTCHA_CONFIG:
        # Dynamic fallback for unknown targets
        positive_prompts = [f"a photo of a {target_name}"]
    else:
        positive_prompts = config["pos"]
        
    negative_prompts = config["neg"]
    all_prompts = positive_prompts + negative_prompts
    
    # 3. Process
    inputs = processor(text=all_prompts, images=image, return_tensors="pt", padding=True)
    
    with torch.no_grad():
        outputs = model(**inputs)
    
    # 4. Calculate Scores
    # We sum the probabilities of all "Positive" prompts
    probs = outputs.logits_per_image.softmax(dim=1)[0] # Flatten
    
    pos_score = sum(probs[:len(positive_prompts)]).item()
    
    threshold = config["thresh"]
    
    # Debug print
    # print(f"   Tile {tile_index+1}: {int(pos_score*100)}% (Req: {int(threshold*100)}%)")
    
    return pos_score > threshold

In [23]:
def solve_optimized():
    clean_data_folder()
    
    driver = uc.Chrome()
    driver.get("https://patrickhlauke.github.io/recaptcha/")
    wait = WebDriverWait(driver, 10)

    try:
        # --- OPEN CAPTCHA ---
        print("Clicking checkbox...")
        frames = driver.find_elements(By.XPATH, "//iframe[contains(@src, 'recaptcha/api2/anchor')]")
        driver.switch_to.frame(frames[0])
        driver.find_element(By.ID, "recaptcha-anchor").click()
        driver.switch_to.default_content()
        time.sleep(4)

        round_count = 1
        
        while True:
            print(f"\n--- ROUND {round_count} ---")
            
            # 1. Check if Solved
            try:
                challenge_frame = wait.until(EC.presence_of_element_located((By.XPATH, "//iframe[contains(@title, 'recaptcha challenge')]")))
                driver.switch_to.frame(challenge_frame)
            except TimeoutException:
                print(">>> SOLVED! <<<")
                break

            # 2. Get Prompt
            try:
                prompt_text = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "strong"))).text
                
                # Clean prompt string
                clean_target = prompt_text.lower()
                # Edge case naming fixes
                if "traffic light" in clean_target: clean_target = "traffic light"
                elif "fire" in clean_target: clean_target = "fire hydrant"
                elif "crosswalk" in clean_target: clean_target = "crosswalk"
                elif "stair" in clean_target: clean_target = "stairs"
                elif "bicycle" in clean_target or "bike" in clean_target: clean_target = "bicycle"
                elif "buses" in clean_target or "bus" in clean_target: clean_target = "bus"
                
                print(f"Goal: {clean_target}")

                # 3. Detect Grid
                tiles = driver.find_elements(By.CSS_SELECTOR, "td.rc-imageselect-tile")
                is_4x4 = (len(tiles) == 16)
                grid_dim = 4 if is_4x4 else 3
                print(f"Detected {grid_dim}x{grid_dim} Grid.")

            except Exception:
                print("Error reading prompt. Exiting.")
                break

            # 4. Capture
            current_folder = create_round_folder(round_count)
            img_wrapper = driver.find_element(By.ID, "rc-imageselect-target")
            img_wrapper.screenshot(os.path.join(current_folder, "full_grid.png"))
            full_image = Image.open(os.path.join(current_folder, "full_grid.png"))
            
            width, height = full_image.size
            tile_w = width // grid_dim
            tile_h = height // grid_dim
            
            matches = []
            
            # 5. Analyze with Smart Logic
            for i in range(len(tiles)):
                row, col = i // grid_dim, i % grid_dim
                tile_img = full_image.crop((col*tile_w, row*tile_h, (col+1)*tile_w, (row+1)*tile_h))
                tile_img.save(os.path.join(current_folder, f"tile_{i+1}.png"))
                
                if get_smart_prediction(tile_img, clean_target, i):
                    matches.append(tiles[i])
            
            # 6. Click & Verify
            print(f"Clicking {len(matches)} tiles...")
            for tile in matches:
                tile.click()
                time.sleep(0.05) # Fast clicks

            print("Clicking Verify/Next...")
            driver.find_element(By.ID, "recaptcha-verify-button").click()
            
            driver.switch_to.default_content()
            time.sleep(3) 
            round_count += 1
            
            if round_count > 15:
                print("Safety Break.")
                break

    except Exception as e:
        print(f"Error: {e}")
    finally:
        driver.quit()

In [None]:
if __name__ == "__main__":
    solve_optimized()