In [None]:
import os
import json
import time
import random
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException


# ========== [1] ChromeDriver Ayarları ==========
chrome_driver_path = r"C:\Users\emirhan.gul\Downloads\chromedriver-win64\chromedriver-win64\chromedriver.exe"  # ← BURAYA kendi chromedriver yolunu yaz
options = Options()
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("start-maximized")
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")

service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service, options=options)
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
    "source": "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"
})
driver.set_page_load_timeout(20)

# ========== [2] Yardımcı Fonksiyonlar ==========

def wait_random(min_sec=1.0, max_sec=1.3):
    time.sleep(random.uniform(min_sec, max_sec))

def scroll_random():
    y = random.randint(300, 1200)
    driver.execute_script(f"window.scrollBy(0, {y});")

def extract_text_safe(by, selector):
    try:
        return driver.find_element(by, selector).text.strip()
    except:
        return None

def get_property_value(key):
    try:
        rows = driver.find_elements(By.CSS_SELECTOR, ".property-item")
        for row in rows:
            if key in row.text:
                return row.find_element(By.CLASS_NAME, "property-value").text.strip()
    except:
        return None
def get_with_timeout(driver, url, timeout=2):
    try:
        driver.set_page_load_timeout(timeout)
        driver.get(url)
    except TimeoutException:
        print(f"⚠️ Sayfa zaman aşımı, stop() uygulandı: {url}")
        driver.execute_script("window.stop()")  # manuel durdurma

def extract_parca_boya():
    try:
        categories = {
            "Orjinal": [],
            "Lokal boyalı": [],
            "Boyalı": [],
            "Değişmiş": [],
            "Belirtilmemiş": []
        }

        section = driver.find_element(By.CLASS_NAME, "car-damage-info")
        items = section.find_elements(By.CLASS_NAME, "car-damage-info-item")

        for item in items:
            try:
                label = item.find_element(By.TAG_NAME, "p").text.strip()
                ul = item.find_element(By.TAG_NAME, "ul")
                for li in ul.find_elements(By.TAG_NAME, "li"):
                    text = li.text.strip()
                    if text:
                        categories.get(label, []).append(text)
            except:
                continue

        return {k.lower().replace(" ", "_"): ", ".join(v) for k, v in categories.items()}

    except Exception as e:
        print(f"❌ Boya bilgisi alınamadı: {e}")
        return {k.lower().replace(" ", "_"): None for k in [
            "Orjinal", "Lokal boyalı", "Boyalı", "Değişmiş", "Belirtilmemiş"
        ]}

# === [C] Daha Önce Kaydedilen İlanları Yükle ===
if os.path.exists("retry_results_partial.csv"):
    df_existing = pd.read_csv("retry_results_partial.csv")
    done_urls = set(df_existing["url"].tolist())
else:
    df_existing = pd.DataFrame()
    done_urls = set()

# ========== [3] Hatalı Linkleri Yükle ==========
df_retry = pd.read_csv("retry_links.csv")
retry_links = df_retry["Ilan_Linkleri"].tolist()

results = []
errors = []

# ========== [4] Tekrar Scrape Başlasın ==========
for i, url in enumerate(retry_links, 1):
    if url in done_urls:
        continue
    try:
        """driver.get(url)"""
        get_with_timeout(driver, url, timeout=2)
        wait_random()
        scroll_random()

        boya = extract_parca_boya()

        data = {
            "url": url,
            "fiyat": extract_text_safe(By.CSS_SELECTOR, ".product-price"),
            "ilan_no": get_property_value("İlan No"),
            "ilan_tarihi": get_property_value("İlan Tarihi"),
            "marka": get_property_value("Marka"),
            "seri": get_property_value("Seri"),
            "model": get_property_value("Model"),
            "yıl": get_property_value("Yıl"),
            "km": get_property_value("Kilometre"),
            "vites": get_property_value("Vites Tipi"),
            "yakıt": get_property_value("Yakıt Tipi"),
            "kasa": get_property_value("Kasa Tipi"),
            "renk": get_property_value("Renk"),
            "motor_hacmi": get_property_value("Motor Hacmi"),
            "motor_gücü": get_property_value("Motor Gücü"),
            "çekiş": get_property_value("Çekiş"),
            "boya_durumu": get_property_value("Durumu"),
            "takasa_uygunluk": get_property_value("Takas"),
            "kimden": get_property_value("Kimden"),
            **boya
        }

        results.append(data)
        print(f"[{i}/{len(retry_links)}] ✔ {url}")

        if i % 5 == 0:
            df_new = pd.DataFrame(results)
            df_existing = pd.concat([df_existing, df_new], ignore_index=True)
            df_existing.to_csv("retry_results_partial.csv", index=False, encoding="utf-8")
            print(f"📝 Ara kayıt yapıldı. Toplam kayıt: {len(df_existing)}")
            results = []  # listeyi temizle

    except Exception as e:
        print(f"[{i}/{len(retry_links)}] ❌ Hata: {url} | {e}")
        errors.append({"url": url, "error": str(e)})

# ========== [5] Son Kayıtlar ==========
if results:
    df = pd.DataFrame(results)
    df.to_csv("retry_results_final.csv", index=False, encoding="utf-8")
    print("✅ Final kayıt yapıldı.")

if errors:
    with open("retry_failed_final.json", "w", encoding="utf-8") as f:
        json.dump(errors, f, ensure_ascii=False, indent=2)
    print(f"⚠️ Hatalı {len(errors)} ilan ayrı kaydedildi: retry_failed_final.json")

driver.quit()
print("🎉 Tüm işlem tamamlandı.")


In [6]:
import os
import json
import time
import random
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException


# ========== [1] ChromeDriver Ayarları ==========
chrome_driver_path = r"C:\Users\emirhan.gul\Downloads\chromedriver-win64\chromedriver-win64\chromedriver.exe"  # ← BURAYA kendi chromedriver yolunu yaz
options = Options()
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("start-maximized")
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")

service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service, options=options)
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
    "source": "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"
})
driver.set_page_load_timeout(20)

# ========== [2] Yardımcı Fonksiyonlar ==========

def wait_random(min_sec=1.0, max_sec=1.3):
    time.sleep(random.uniform(min_sec, max_sec))

def scroll_random():
    y = random.randint(300, 1200)
    driver.execute_script(f"window.scrollBy(0, {y});")

def extract_text_safe(by, selector):
    try:
        return driver.find_element(by, selector).text.strip()
    except:
        return None

def get_property_value(key):
    try:
        rows = driver.find_elements(By.CSS_SELECTOR, ".property-item")
        for row in rows:
            if key in row.text:
                return row.find_element(By.CLASS_NAME, "property-value").text.strip()
    except:
        return None
def get_with_timeout(driver, url, timeout=2):
    try:
        driver.set_page_load_timeout(timeout)
        driver.get(url)
    except TimeoutException:
        print(f"⚠️ Sayfa zaman aşımı, stop() uygulandı: {url}")
        driver.execute_script("window.stop()")  # manuel durdurma

def extract_parca_boya():
    try:
        categories = {
            "Orjinal": [],
            "Lokal boyalı": [],
            "Boyalı": [],
            "Değişmiş": [],
            "Belirtilmemiş": []
        }

        section = driver.find_element(By.CLASS_NAME, "car-damage-info")
        items = section.find_elements(By.CLASS_NAME, "car-damage-info-item")

        for item in items:
            try:
                label = item.find_element(By.TAG_NAME, "p").text.strip()
                ul = item.find_element(By.TAG_NAME, "ul")
                for li in ul.find_elements(By.TAG_NAME, "li"):
                    text = li.text.strip()
                    if text:
                        categories.get(label, []).append(text)
            except:
                continue

        return {k.lower().replace(" ", "_"): ", ".join(v) for k, v in categories.items()}

    except Exception as e:
        print(f"❌ Boya bilgisi alınamadı: {e}")
        return {k.lower().replace(" ", "_"): None for k in [
            "Orjinal", "Lokal boyalı", "Boyalı", "Değişmiş", "Belirtilmemiş"
        ]}





# ========== [4] Tekrar Scrape Başlasın ==========

results = []

"""driver.get(url)"""
get_with_timeout(driver, "https://www.arabam.com/ilan/sahibinden-satilik-volkswagen-passat-1-6-tdi-bluemotion-comfortline/sahibinden-volkswagen-passat-1-6-tdi-bluemotion-comfortline-2015-model/29768546", timeout=2)
wait_random()
scroll_random()

boya = extract_parca_boya()

data = {
    "url": url,
    "fiyat": extract_text_safe(By.CSS_SELECTOR, ".product-price"),
    "ilan_no": get_property_value("İlan No"),
    "ilan_tarihi": get_property_value("İlan Tarihi"),
    "marka": get_property_value("Marka"),
    "seri": get_property_value("Seri"),
    "model": get_property_value("Model"),
    "yıl": get_property_value("Yıl"),
    "km": get_property_value("Kilometre"),
    "vites": get_property_value("Vites Tipi"),
    "yakıt": get_property_value("Yakıt Tipi"),
    "kasa": get_property_value("Kasa Tipi"),
    "renk": get_property_value("Renk"),
    "motor_hacmi": get_property_value("Motor Hacmi"),
    "motor_gücü": get_property_value("Motor Gücü"),
    "çekiş": get_property_value("Çekiş"),
    "boya_durumu": get_property_value("Durumu"),
    "takasa_uygunluk": get_property_value("Takas"),
    "kimden": get_property_value("Kimden"),
    **boya
}

results.append(data)
print(f"[{i}/{len(retry_links)}] ✔ {url}")


    

    
df_new = pd.DataFrame(results)            
df_new.to_csv("sonbasbalink.csv", index=False, encoding="utf-8")
print(f"📝 Ara kayıt yapıldı. ")
results = []  # listeyi temizle
driver.quit()
print("🎉 Tüm işlem tamamlandı.")


⚠️ Sayfa zaman aşımı, stop() uygulandı: https://www.arabam.com/ilan/sahibinden-satilik-volkswagen-passat-1-6-tdi-bluemotion-comfortline/sahibinden-volkswagen-passat-1-6-tdi-bluemotion-comfortline-2015-model/29768546
[175/175] ✔ 6
📝 Ara kayıt yapıldı. 
🎉 Tüm işlem tamamlandı.
