In [3]:
import os
import json
import time
import random
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException

# === [0] Uyku Modunu Engelle ===
import ctypes

ES_CONTINUOUS = 0x80000000
ES_SYSTEM_REQUIRED = 0x00000001

# Kod çalıştığı sürece bilgisayarın uyku moduna geçmesini engelle
ctypes.windll.kernel32.SetThreadExecutionState(ES_CONTINUOUS | ES_SYSTEM_REQUIRED)


# === [A] ChromeDriver Ayarları ===
chrome_driver_path = r"C:\Users\emirhan.gul\Downloads\chromedriver-win64\chromedriver-win64\chromedriver.exe"  # <- kendi yolun
options = Options()
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument("--disable-blink-features=AutomationControlled")

options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("start-maximized")
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.5735.110 Safari/537.36")



service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service, options=options)
driver.set_page_load_timeout(50)


driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
    "source": "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"
})



# === [B] Yardımcı Fonksiyonlar ===
def scroll_page_randomly():
    y = random.randint(300, 1500)
    driver.execute_script(f"window.scrollBy(0, {y});")

def accept_cookies_if_present():
    try:
        WebDriverWait(driver, 1).until(
            EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))
        ).click()
        print("🍪 Çerezler kabul edildi.")
    except:
        pass  # Çıkmadıysa geç


def wait_random(min_sec=1.8, max_sec=3.0):
    time.sleep(random.uniform(min_sec, max_sec))

def extract_text_safe(by, identifier):
    try:
        return driver.find_element(by, identifier).text.strip()
    except:
        return None

def extract_parca_boya_bolunmus():
    try:
        categories = {
            "Orjinal": [],
            "Lokal boyalı": [],
            "Boyalı": [],
            "Değişmiş": [],
            "Belirtilmemiş": []
        }

        section = driver.find_element(By.CLASS_NAME, "car-damage-info")
        items = section.find_elements(By.CLASS_NAME, "car-damage-info-item")

        for item in items:
            try:
                label = item.find_element(By.TAG_NAME, "p").text.strip()
                ul = item.find_element(By.TAG_NAME, "ul")
                lis = ul.find_elements(By.TAG_NAME, "li")
                for li in lis:
                    parca = li.text.strip()
                    if parca:
                        if label in categories:
                            categories[label].append(parca)
                        else:
                            print(f"⚠️ Parça işlenemedi: '{label}'")
            except Exception as inner_e:
                print(f"⚠️ Parça işlenemedi: {inner_e}")
                continue

        return {
            "orjinal": ", ".join(categories["Orjinal"]),
            "lokal_boyalı": ", ".join(categories["Lokal boyalı"]),
            "boyalı": ", ".join(categories["Boyalı"]),
            "değişmiş": ", ".join(categories["Değişmiş"]),
            "belirtilmemiş": ", ".join(categories["Belirtilmemiş"])
        }

    except Exception as e:
        print(f"❌ extract_parca_boya_bolunmus başarısız: {e}")
        return {
            "orjinal": None,
            "lokal_boyalı": None,
            "boyalı": None,
            "değişmiş": None,
            "belirtilmemiş": None
        }




def get_property_value(key):
    try:
        rows = driver.find_elements(By.CSS_SELECTOR, '.property-item')
        for row in rows:
            if key in row.text:
                return row.find_element(By.CLASS_NAME, 'property-value').text.strip()
    except:
        pass
    return None

# === [C] Daha Önce Kaydedilen İlanları Yükle ===
if os.path.exists("results.csv"):
    df_existing = pd.read_csv("results.csv")
    done_urls = set(df_existing["url"].tolist())
else:
    df_existing = pd.DataFrame()
    done_urls = set()

if os.path.exists("failed_ads.json"):
    with open("failed_ads.json", "r", encoding="utf-8") as f:
        failed_ads = json.load(f)
else:
    failed_ads = []

# === [D] Çekilecek Tüm Linkleri Yükle ===
all_links = pd.read_csv("volkswagen_passat-1-6-tdi-bluemotion_2018_43_sayfa_ilan_linkleri.csv")["Ilan_Linkleri"].tolist()
total_ads = len(all_links)
print(f"Toplam {total_ads} ilan yüklendi. Daha önce tamamlanan: {len(done_urls)}")

# === [E] Scraping Başlat ===
results = []
processed = 0

for idx, url in enumerate(all_links, start=1):
    if url in done_urls:
        continue

    try:
        driver.get(url)
        """accept_cookies_if_present()"""
        wait_random()
        scroll_page_randomly()
        
        boya_dict = extract_parca_boya_bolunmus()

        data = {
            "url": url,
            "fiyat": extract_text_safe(By.CSS_SELECTOR, ".product-price"),
            "ilan_no": get_property_value("İlan No"),
            "ilan_tarihi": get_property_value("İlan Tarihi"),
            "marka": get_property_value("Marka"),
            "seri": get_property_value("Seri"),
            "model": get_property_value("Model"),
            "yıl": get_property_value("Yıl"),
            "km": get_property_value("Kilometre"),
            "vites": get_property_value("Vites Tipi"),
            "yakıt": get_property_value("Yakıt Tipi"),
            "kasa": get_property_value("Kasa Tipi"),
            "renk": get_property_value("Renk"),
            "motor_hacmi": get_property_value("Motor Hacmi"),
            "motor_gücü": get_property_value("Motor Gücü"),
            "çekiş": get_property_value("Çekiş"),
            "boya_durumu": get_property_value("Durumu"),
            "takasa_uygunluk": get_property_value("Takas"),
            "kimden": get_property_value("Kimden"),
            "orjinal": boya_dict["orjinal"],
            "lokal_boyalı": boya_dict["lokal_boyalı"],
            "boyalı": boya_dict["boyalı"],
            "değişmiş": boya_dict["değişmiş"],
            "belirtilmemiş": boya_dict["belirtilmemiş"]
        }

        results.append(data)
        processed += 1
        print(f"[{processed}/{total_ads}] {url} ✔")

        # Her 5 ilanda bir CSV’ye yaz
        if processed % 5 == 0:
            df_new = pd.DataFrame(results)
            df_existing = pd.concat([df_existing, df_new], ignore_index=True)
            df_existing.to_csv("results.csv", index=False, encoding="utf-8")  # önceki satırlarla birlikte yaz
            print(f"Ara kayıt yapıldı ({len(df_existing)} ilan kayıtlı) ✅")
            results = []

    except Exception as e:
        print(f"[X] Hata: {url} | {e}")
        failed_ads.append({"url": url, "error": str(e)})

# === [F] Kalan verileri kaydet ===
if results:
    df_new = pd.DataFrame(results)
    df_combined = pd.concat([df_existing, df_new], ignore_index=True)
    df_combined.to_csv("results.csv", index=False, encoding="utf-8")
    print("Final kayıt yapıldı ✅")

# === [G] Hataları JSON olarak kaydet ===
with open("failed_ads.json", "w", encoding="utf-8") as f:
    json.dump(failed_ads, f, ensure_ascii=False, indent=2)

driver.quit()
print("Tüm işlem tamamlandı. Tarayıcı kapatıldı. ✅")


Toplam 2120 ilan yüklendi. Daha önce tamamlanan: 1050
❌ extract_parca_boya_bolunmus başarısız: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=138.0.7204.96)
Stacktrace:
	GetHandleVerifier [0x0x7ff6127e6f65+76917]
	GetHandleVerifier [0x0x7ff6127e6fc0+77008]
	(No symbol) [0x0x7ff612599dea]
	(No symbol) [0x0x7ff612572191]
	(No symbol) [0x0x7ff61261f83e]
	(No symbol) [0x0x7ff61263ff72]
	(No symbol) [0x0x7ff612618243]
	(No symbol) [0x0x7ff6125e1431]
	(No symbol) [0x0x7ff6125e21c3]
	GetHandleVerifier [0x0x7ff612abd29d+3051437]
	GetHandleVerifier [0x0x7ff612ab78f3+3028483]
	GetHandleVerifier [0x0x7ff612ad588d+3151261]
	GetHandleVerifier [0x0x7ff61280182e+185662]
	GetHandleVerifier [0x0x7ff6128096ef+218111]
	GetHandleVerifier [0x0x7ff6127efae4+112628]
	GetHandleVerifier [0x0x7ff6127efc99+113065]
	GetHandleVerifier [0x0x7ff6127d6c68+10616]
	BaseThreadInitThunk [0x0x7ff81a5de8d7+23]
	RtlUserThreadStart [0x0x7ff81b23c34c+44]

[

  df_combined = pd.concat([df_existing, df_new], ignore_index=True)


KeyboardInterrupt: 