In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import time


headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
}


product_list = []
a = 1

while a <= 900:
    print(f"Page {a} is being processed...")

    
    time.sleep(60)
    
    
    r = requests.get(f"https://www.hepsiemlak.com/satilik?page={a}", headers=headers)
    
    
    if r.status_code == 429:
        print(f"Page {a}: 429 Too Many Requests. This page is skipped.")
        a += 1
        continue
    elif r.status_code != 200:
        print(f"Page {a}: {r.status_code} error. Moving forward")
        a += 1
        continue

    soup = BeautifulSoup(r.content, "lxml")
    page = soup.find("ul", attrs={"class": "list-items-container"})
    
    
    if page:
        product = page.find_all("div", attrs={"class": "links"})
        for i in product:
            link_end = i.a.get("href")
            link_start = "https://www.hepsiemlak.com"
            link = link_start + link_end
            print(link)

            #
            r1 = requests.get(link, headers=headers)

            
            if r1.status_code == 429:
                print(f"Error 429 in the product link. This product is skipped.")
                continue
            elif r1.status_code != 200:
                print(f"Error {r1.status_code} in product link. This product is skipped.")
                continue

            sp1 = BeautifulSoup(r1.content, "lxml")

            try:
                Price = sp1.find("div", attrs={"class": "right"}).text.strip()
                Location = sp1.find("ul", attrs={"class": "short-property"})
                
                Şehir = np.nan
                District = np.nan
                Metre_Kare = np.nan

                
                if Location:
                    li_elements = Location.find_all("li", attrs={"data-v-f9b3c830": True})
                    if len(li_elements) >= 2:
                        City = li_elements[0].get_text(strip=True)
                        District = li_elements[1].get_text(strip=True)
                    if len(li_elements) >= 7:
                        Metre_Kare = li_elements[6].get_text(strip=True)

                
                features = []
                values = []
                features_raw = sp1.find_all("li", attrs={"class": "spec-item"})
                for feature in features_raw:
                    spans = feature.find_all("span")
                    if len(spans) == 2:
                        feature_name = spans[0].get_text(strip=True)
                        feature_value = spans[1].get_text(strip=True)
                        
                        features.append(feature_name)
                        values.append(feature_value)

                
                feature_dict = dict(zip(features, values))
                feature_dict["Şehir"] = City
                feature_dict["İlçe"] = District
                feature_dict["Fiyat"] = Price
                feature_dict["Metre_Kare"] = Metre_Kare

                product_list.append(feature_dict)

            except Exception as e:
                
                feature_dict = {
                    "Şehir": np.nan,
                    "İlçe": np.nan,
                    "Fiyat": np.nan,
                    "Metre_Kare": np.nan
                }
                product_list.append(feature_dict)
                print(f"Error: {e}")

    else:
        print(f"Page {a} not found or content not available. Moving forward.")
    
    
    a += 1
    print(f"Moving to page {a}...")


df = pd.DataFrame(product_list).fillna(np.nan)


selected_features = [
    'Şehir', 'İlçe', 'Fiyat', 'Metre_Kare', 'Konut Tipi', 'Oda + Salon Sayısı',
    'Bulunduğu Kat', 'Bina Yaşı', 'Isınma Tipi', 'Kat Sayısı',
    'Eşya Durumu', 'Banyo Sayısı', 'Kira Getirisi'
]
df = df[selected_features]


df.to_excel("Hepsi_Emlak.xlsx", index=False)
