In [12]:
import json
import requests
from copy import deepcopy
import os
import sys

CURR_DIR = os.getcwd()
PARENT_DIR = "/".join(CURR_DIR.split("/")[:-1])
sys.path.append(PARENT_DIR)
import utils

In [13]:
target_domains = [
    "https://www.room.nl/en/",
    "https://kamernet.nl/en",
    "https://www.funda.nl/en/",
    "https://www.pararius.com/english"
]

domain = target_domains[0]

In [14]:
n_results = 25

In [15]:
search_domain = "https://roomapi.hexia.io/api/v1/actueel-aanbod?"
search_params = "limit=%s&locale=en_GB&page=0&sort=-publicationDate" % n_results

search_url = search_domain + search_params

resp = requests.post(search_url)
resp_dict = json.loads(resp.text)

In [6]:
n_total_results = resp_dict['_metadata']['total_search_count']
updated_search_params = "limit=%s&locale=en_GB&page=0&sort=-publicationDate" % n_total_results

updated_search_url = search_domain + updated_search_params

updated_resp = requests.post(updated_search_url)
updated_resp_dict = json.loads(updated_resp.text)

In [7]:
result_rentals = updated_resp_dict['data']

In [8]:
L1_TARGET_RENTAL_KEYS = [
    'ID', 'postalcode', 'street', 'houseNumber', 'houseNumberAddition',
    'gemeenteGeoLocatieNaam', 'rentBuy', 'availableFromDate', 'areaDwelling',
    'totalRent', 'netRent', 'calculationRent', 'serviceCosts', 'heatingCosts',
    'additionalCosts', 'numberOfReactions', 'publicationDate', 'closingDate', 
    'isWoningruil', 'urlKey', 'infoveld', 'specifiekeVoorzieningen'
]

Ln_TARGET_RENTAL_KEYS = [
    ['quarter', 'name'], ['corporation', 'name'], ['dwellingType', 'localizedName'],
    ['sleepingRoom', 'amountOfRooms'], ['sleepingRoom', 'naam'], ['kitchen', 'localizedName'],
    ['floor', 'verdieping'], ['woningsoort', 'localizedNaam']
]

def retrieve_rental_data(obj_data, 
                         layer1_target_keys = L1_TARGET_RENTAL_KEYS.copy(), 
                         layern_target_keys = Ln_TARGET_RENTAL_KEYS.copy()):
    results = {}
    # Layer 1 handling for standard dict key value return
    if layer1_target_keys is not None:
        l1_results = {x: obj_data[x] for x in layer1_target_keys}
        results.update(l1_results)
        if layern_target_keys is None:
            return l1_results
    l2_results = {}
    # Check for multi layer keys
    for ln_key in layern_target_keys:
        layernm1_key = ln_key.pop(0)
        layernm1_obj = obj_data[layernm1_key]
        # Final layer if length of key list is 1 - return value through l2_results
        if len(ln_key) == 1:
            ln_key_str = ln_key[0]
            ln_result = retrieve_rental_data(layernm1_obj,
                                             layer1_target_keys = ln_key,
                                             layern_target_keys = None)
            ln_result_key = "%s_%s" % (layernm1_key, ln_key_str)
            l2_results[ln_result_key] = ln_result[ln_key_str]
        # If key list is > 1, continue to dig with layer1_target_keys being None
        elif len(ln_key) > 1:
            ln_key_new_lead = ln_key[0]
            ln_key_list_sub_keys = ln_key[1:]
            if len(ln_key_list_sub_keys) == 1:
                l2_results = retrieve_rental_data(layernm1_obj, layer1_target_keys = ln_key_list_sub_keys, layern_target_keys = None)
            else:
                l2_results = retrieve_rental_data(layernm1_obj, layer1_target_keys = None, layern_target_keys = [ln_key_list_sub_keys])
        elif len(ln_key) == 0:
            l2_results = {layernm1_key: layernm1_obj}
            return l2_results
    
    results.update(l2_results)

    return results

In [9]:
mapping_f = open("db_mappings/room_mapping.json")
db_mapping_dict = json.load(mapping_f)

In [10]:
short_results = []
for rental_obj in result_rentals:
    tmp_l1_keys = deepcopy(L1_TARGET_RENTAL_KEYS)
    tmp_ln_keys = deepcopy(Ln_TARGET_RENTAL_KEYS)

    if "postalcode" not in rental_obj and "postalcode" in tmp_l1_keys:
        tmp_l1_keys.pop(tmp_l1_keys.index("postalcode"))
        if "postcode" in rental_obj:
            tmp_l1_keys.append("postcode")
    
    if "street" not in rental_obj:
        continue

    if "houseNumber" not in rental_obj:
        continue

    if "gemeenteGeoLocatieNaam" not in rental_obj:
        continue

    if rental_obj["gemeenteGeoLocatieNaam"] is None:
        continue

    truncated_info_dict = retrieve_rental_data(rental_obj, tmp_l1_keys, tmp_ln_keys)
    if "specifiekeVoorzieningen" in truncated_info_dict:
        amenity_details = truncated_info_dict["specifiekeVoorzieningen"]
        amenity_list = [x['localizedName'] for x in amenity_details]
        amenity_str = ", ".join(amenity_list)
        truncated_info_dict['specifiekeVoorzieningen'] = amenity_str
    truncated_info_dict['domain'] = domain
    renamed_info_dict = {db_mapping_dict[k]: v for k, v in truncated_info_dict.items()}
    short_results.append(renamed_info_dict)

In [10]:
utils.push_to_db(short_results)