In [4]:
from typing import List
from dataclasses import dataclass
import pandas as pd
import requests


def get_olx(page: int) -> list:
    params = (
        ('category', '5158'),
        ('facet_limit', '100'),
        ('location', '2000032'),
        ('location_facet_limit', '20'),
        ('page', page),
        ('platform', 'web-desktop'),
        ('spellcheck', 'true'),
        ('user', '17a332855b1x1e446ba'),

    )

    response = requests.get('https://www.olx.co.id/api/relevance/v2/search', params=params, headers={
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0',
        'Accept': 'application/json',
        'Accept-Language': 'en-USen;q=0.5',
        'Accept-Encoding': 'gzip,deflate br',
        'Connection': 'keep-alive',
        'x-nba-stats-origin': 'stats',
        'x-nba-stats-token': 'true'
    })

    if response.status_code != 200:
        return []

    return response.json()["data"]


@dataclass
class Home:
    id_: str
    url: str
    price: int
    title: str
    description: str
    district: str
    sub_district: str
    user_id: int
    images: List[str]
    sqr_building: int
    sqr_land: int
    bedroom: int


def get_data() -> pd.DataFrame:
    homes = []

    i = 0
    while True:
        ads_raw = get_olx(i)
        if not ads_raw:
            break

        for ad in ads_raw:
            id_ = ad["id"]
            url = f'https://www.olx.co.id/item/{ad["id"]}'
            price = int(ad["price"]["value"]["raw"])
            title = ad["title"]
            description = ad["description"]
            district = ad["locations_resolved"]["ADMIN_LEVEL_3_name"]
            sub_district = ad["locations_resolved"]["SUBLOCALITY_LEVEL_1_name"]
            user_id = int(ad["user_id"])
            images = [i["url"] for i in ad["images"]]

            sqr_building = [p for p in ad["parameters"] if p["key"] == "p_sqr_building"][0]["value"]
            sqr_land = [p for p in ad["parameters"] if p["key"] == "p_sqr_land"][0]["value"]
            bedroom = [p for p in ad["parameters"] if p["key"] == "p_bedroom"][0]["value"]

            homes.append(
                Home(
                    id_=id_,
                    url=url,
                    price=price,
                    title=title,
                    description=description,
                    district=district,
                    sub_district=sub_district,
                    user_id=user_id,
                    images=images,
                    sqr_building=sqr_building,
                    sqr_land=sqr_land,
                    bedroom=bedroom,
                )
            )

        i += 1

    return pd.DataFrame(homes)


df = get_data()
df.head()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25


Unnamed: 0,id_,url,price,title,description,district,sub_district,user_id,images,sqr_building,sqr_land,bedroom
0,867924477,https://www.olx.co.id/item/867924477,2500000000,Jual Rumah Ilir-Ilir,Rumah dengan lokasi di lingkungan kampus UGM d...,Sleman Kab.,Depok,1441245,[https://apollo-singapore.akamaized.net:443/v1...,164,221,7
1,859227780,https://www.olx.co.id/item/859227780,4000000000,Dijual rumah nyaman dan lokasi strategis,"Lokasi strategis, jalan dua arah, furnished, g...",Sleman Kab.,Ngaglik,111109904,[https://apollo-singapore.akamaized.net:443/v1...,347,415,7
2,825483212,https://www.olx.co.id/item/825483212,690000000,Rumah dijual jalan godean dalam ring road,DIJUAL CEPAT!! HARGA YG TERTERA TIDAK UNTUK PE...,Sleman Kab.,Gamping,115007344,[https://apollo-singapore.akamaized.net:443/v1...,95,109,3
3,867879586,https://www.olx.co.id/item/867879586,3800000000,Jual rumah secepatnya. Bisa nego dulu.,Tanah dan bangunan serta dijual secepatnya. Fa...,Sleman Kab.,Ngaglik,90595802,[https://apollo-singapore.akamaized.net:443/v1...,307,307,5
4,869005448,https://www.olx.co.id/item/869005448,424000038,"Harga 400 Jutaan, Rumah Siap Huni Dekat Ringro...",Rumah Minimalis Berlokasi di Bantul Yogyakarta...,Yogyakarta Kota,Umbulharjo,92483130,[https://apollo-singapore.akamaized.net:443/v1...,45,75,2


In [5]:
df.to_csv("rumah.csv", index=False)