# Imports

In [None]:
import requests

from bs4 import BeautifulSoup
import json
import csv

# Define Query

In [None]:
query = {
    "AdTypeID": "1",
    "PrTypeID": "1",
    "cities": "1",
    "districts": "106.28.29.30.38.39.40.41.42.43.46.47.48.101",
    "regions": "4",
    "WithPhoto": "1"
}

url_before = "https://www.myhome.ge/ka/s/iyideba-bina-Tbilisi/?Keyword=%E1%83%95%E1%83%90%E1%83%99%E1%83%94-%E1%83%A1%E1%83%90%E1%83%91%E1%83%A3%E1%83%A0%E1%83%97%E1%83%90%E1%83%9A%E1%83%9D&AdTypeID=" + query["AdTypeID"] + "&PrTypeID=" + query["PrTypeID"] + "&cities=" + query["cities"] + "&districts=" + query["districts"] + "&regions=" + query["regions"] + "&CardView=1&OwnerTypeID=1&Page="
url_after = "&WithPhoto=" + query["WithPhoto"]

# Scrape

In [None]:
apartments = []

In [None]:
def get_query_response(page):
    url = url_before + str(page) + url_after

    soup = BeautifulSoup(requests.get(url).content, 'html.parser')
    queries = json.loads(soup.find('script', type='application/json').text)["props"]["pageProps"]["dehydratedState"]["queries"]

    for saved_query in queries:
        key = saved_query["queryKey"]

        if len(key) < 3:
            continue
        key = key[2]["query"]

        cont = False
        
        # Ensure the query found is the query we sent
        for k in query.keys():
            if k not in key:
                cont = True
                break
            if key[k] != query[k]:
                cont = True
                break
        if cont or key["Page"] != str(page):
            continue
        return saved_query
    return None

In [None]:
print("URL =", url_before + "1" + url_after)

information = get_query_response(1)

if not information:
    print("Unable to find query response")
    quit()
from_page = 1
to_page = int(information["state"]["data"]["data"]["last_page"])

for page in range(from_page, to_page + 1):
    response = get_query_response(page)
    
    if response:
        apartments.extend(response["state"]["data"]["data"]["children"])
    print("Page =", str(page) + "/" + str(to_page), "| Apartments =", len(apartments))

# Clean Data

In [None]:
apartments = [ apartment for apartment in apartments if apartment["price"]["total_price"]["usd"] <= 300_000 ]

for apartment in apartments:
    if "ძველი" in apartment["title"]:
        apartment["title"] = "ძველი"
    elif "ახალი" in apartment["title"]:
        apartment["title"] = "ახალი"
    elif "მშენებარე" in apartment["title"]:
        apartment["title"] = "მშენებარე"

# Write to CSV

In [None]:
with open('apartments.csv', 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)

    field = ["id", "neighborhood", "street", "title", "price", "floor", "total_floors", "rooms", "bedrooms", "sqm"]
    writer.writerow(field)

    for apartment in apartments:
        id = apartment["id"]
        neighborhood = apartment["place"]
        street = apartment["desc_text"]
        title = apartment["title"]
        price = apartment["price"]["total_price"]["usd"]
        
        facilities = apartment["facilities"]
        floors = facilities[0]["label"].split("/")
        floor = floors[0]
        total_floors = floors[1]
        rooms = int(float(facilities[1]["label"]))
        bedrooms = facilities[2]["label"]
        sqm = float(facilities[3]["label"])

        writer.writerow([id, neighborhood, street, title, price, floor, total_floors, rooms, bedrooms, sqm])