In [1]:
from typing import List
from dataclasses import dataclass
import pandas as pd
import requests


def get_olx(page: int) -> list:
    params = (
        ('category', '5158'),
        ('facet_limit', '100'),
        ('location', '2000032'),
        ('location_facet_limit', '20'),
        ('page', page),
        ('platform', 'web-desktop'),
        ('spellcheck', 'true'),
        ('user', '17a332855b1x1e446ba'),

    )

    response = requests.get('https://www.olx.co.id/api/relevance/v2/search', params=params, headers={
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0',
        'Accept': 'application/json',
        'Accept-Language': 'en-USen;q=0.5',
        'Accept-Encoding': 'gzip,deflate br',
        'Connection': 'keep-alive',
        'x-nba-stats-origin': 'stats',
        'x-nba-stats-token': 'true'
    })

    if response.status_code != 200:
        return []

    return response.json()["data"]


@dataclass
class Home:
    id_: str
    url: str
    price: int
    title: str
    description: str
    district: str
    sub_district: str
    user_id: int
    images: List[str]
    sqr_building: int
    sqr_land: int
    bedroom: int


def get_data() -> pd.DataFrame:
    homes = []

    i = 0
    while True:
        ads_raw = get_olx(i)
        if not ads_raw:
            break

        for ad in ads_raw:
            id_ = ad["id"]
            url = f'https://www.olx.co.id/item/{ad["id"]}'
            price = int(ad["price"]["value"]["raw"])
            title = ad["title"]
            description = ad["description"]
            district = ad["locations_resolved"]["ADMIN_LEVEL_3_name"]
            sub_district = ad["locations_resolved"]["SUBLOCALITY_LEVEL_1_name"]
            user_id = int(ad["user_id"])
            images = [i["url"] for i in ad["images"]]

            sqr_building = [p for p in ad["parameters"] if p["key"] == "p_sqr_building"][0]["value"]
            sqr_land = [p for p in ad["parameters"] if p["key"] == "p_sqr_land"][0]["value"]
            bedroom = [p for p in ad["parameters"] if p["key"] == "p_bedroom"][0]["value"]

            homes.append(
                Home(
                    id_=id_,
                    url=url,
                    price=price,
                    title=title,
                    description=description,
                    district=district,
                    sub_district=sub_district,
                    user_id=user_id,
                    images=images,
                    sqr_building=sqr_building,
                    sqr_land=sqr_land,
                    bedroom=bedroom,
                )
            )

        i += 1

    return pd.DataFrame(homes)


df = get_data()
df.head()

Unnamed: 0,id_,url,price,title,description,district,sub_district,user_id,images,sqr_building,sqr_land,bedroom
0,861566104,https://www.olx.co.id/item/861566104,325000000,Rumah murah berkualitas,Rumah siap bangun.lokasi di perkampungan padat...,Sleman Kab.,Depok,8235968,[https://apollo-singapore.akamaized.net:443/v1...,45,91,2
1,863964536,https://www.olx.co.id/item/863964536,7000000000,Jual cepat rumah besar di Pandega Asih,Jual cepat\nRumah besar dan nyaman di Pandega ...,Sleman Kab.,Depok,28759832,[https://apollo-singapore.akamaized.net:443/v1...,800,624,5
2,868786628,https://www.olx.co.id/item/868786628,3000000000,Dijual Cepat Rumah Murah Lokasi Strategis,Dijual Cepat Rumah Murah sangat bagus untuk In...,Sleman Kab.,Depok,39954330,[https://apollo-singapore.akamaized.net:443/v1...,266,278,7
3,867739657,https://www.olx.co.id/item/867739657,445000000,Dijual Rumah 400jt an 3 kmr tidur dalam perum ...,Perumahan Selo Permata Asri\n\nLingkungan Asri...,Sleman Kab.,Prambanan,117741327,[https://apollo-singapore.akamaized.net:443/v1...,95,101,3
4,774615563,https://www.olx.co.id/item/774615563,1250000000,Dijual Rumah Dekat Kampus UGM Jalan Sulaesi Ut...,RUMAH DI JUAL BELAKANG INUL FIESTA KARAOKE\r\n...,Sleman Kab.,Mlati,58188085,[https://apollo-singapore.akamaized.net:443/v1...,130,134,3


In [2]:
df.to_csv("rumah.csv", index=False)