In [20]:
from dotenv import load_dotenv
load_dotenv()

import pandas as pd
import os
import requests
import base64
import json
from datetime import datetime
from pathlib import Path

In [None]:
def get_project_root() -> Path:
    return Path(os.getcwd()).parent.parent.parent.parent

In [None]:
def get_oauth_token(key: str, sec: str):
    url = "https://api.idealista.com/oauth/token"

    auth = base64.b64encode(bytes(f'{key}:{sec}', 'utf-8'))

    headers = {
        'Authorization' : f"Basic {auth.decode('utf-8')}",
        'Content-Type': 'application/x-www-form-urlencoded',
    }

    params = {
        'grant_type': 'client_credentials',
        'score': 'read'
    }

    content = requests.post(url, headers=headers, params=params)

    return json.loads(content.text)['access_token']

In [None]:
def search_api(token: str, page: int):
    url = "https://api.idealista.com/3.5/es/search"

    headers = {
        'Authorization': f"Bearer {token}",
        'Content-Type': 'Content-Type: multipart/form-data'
    }

    params = {
        'numPage': page,
        'operation': 'sale',
        'propertyType': 'homes',
        'maxItems': '50',
        'locationId': '0-EU-ES-29-02-001-067',  # Malaga
        'distance': '6000'
    }

    return json.loads(requests.post(url, headers=headers, params=params).text)

In [16]:
def persist_data(df_idealista):
    filename = "idealista_data.pickle"

    file_path = os.path.join(get_project_root(), 'data', 'raw', filename)

    try:
        df_prev = pd.read_pickle(file_path)

        pd.concat([df_prev, df_idealista], ignore_index=True).to_pickle(file_path)
    except FileNotFoundError:
        print("No previous data collected.")
        df_idealista.to_pickle(file_path)

In [5]:
credentials_index = 1
num_page = 1

results = []
while True:
    apikey = os.environ.get(f"IDEALISTA_APIKEY_{credentials_index}")
    secret = os.environ.get(f"IDEALISTA_SECRET_{credentials_index}")

    if any([apikey is None, secret is None]):
        print("No more credentials. Finishing execution...")
        break

    access_token = get_oauth_token(apikey, secret)

    try:
        response = search_api(access_token, num_page)
    except ValueError as e:
        print(f"Can't do more requests with credentials {credentials_index}. Trying with others.")
        credentials_index += 1

        continue

    if 'elementList' in response:
        print(f"Credentials: {credentials_index}\tPage number: {num_page}")

        partial_df = pd.DataFrame.from_dict(response['elementList'])
        partial_df['DATE_RETRIEVED'] = datetime.now()
        results.append(partial_df)

        num_page += 1

        if num_page == response.get('totalPages'):
            print("Finishing execution...")
            break

Page number: 1
Page number: 2
Page number: 3
Page number: 4
Page number: 5
Page number: 6
Page number: 7
Page number: 8
Page number: 9
Page number: 10
Page number: 11
Page number: 12
Page number: 13
Page number: 14
Page number: 15
Page number: 16
Page number: 17
Page number: 18
Page number: 19
Page number: 20
Page number: 21
Page number: 22
Page number: 23
Page number: 24
Page number: 25
Page number: 26
Page number: 27
Page number: 28
Page number: 29
Page number: 30
Page number: 31
Page number: 32
Page number: 33
Page number: 34
Page number: 35
Page number: 36
Page number: 37
Page number: 38
Page number: 39
Page number: 40
Page number: 41
Page number: 42
Page number: 43
Page number: 44
Page number: 45
Page number: 46
Page number: 47
Page number: 48
Page number: 49
Page number: 50
Page number: 51
Page number: 52
Page number: 53
Page number: 54
Page number: 55
Page number: 56
Page number: 57
Page number: 58
Page number: 59
Page number: 60
Page number: 61
Page number: 62
Page number: 63
P

In [22]:
df = pd.concat(results, ignore_index=True)
persist_data(df_idealista=df)
df

No previous data collected.


Unnamed: 0,propertyCode,thumbnail,externalReference,numPhotos,floor,price,propertyType,operation,size,exterior,...,hasPlan,has3DTour,has360,hasStaging,labels,superTopHighlight,topNewDevelopment,parkingSpace,newDevelopmentFinished,DATE_RETRIEVED
0,97416882,https://img3.idealista.com/blur/WEB_LISTING/0/...,BS171961,32.0,5,1074000.0,flat,sale,149.0,True,...,True,True,False,False,"[{'name': 'luxuryType', 'text': 'Lujo'}]",False,False,,,2022-10-08 17:22:16.562386
1,98401330,https://img3.idealista.com/blur/WEB_LISTING/0/...,JR176312,38.0,2,329500.0,flat,sale,132.0,True,...,True,True,False,False,,False,False,"{'hasParkingSpace': True, 'isParkingSpaceInclu...",,2022-10-08 17:22:16.562386
2,91845863,https://img3.idealista.com/blur/WEB_LISTING/0/...,BS151978,29.0,5,850000.0,penthouse,sale,174.0,True,...,True,True,False,False,"[{'name': 'luxuryType', 'text': 'Lujo'}]",False,False,,,2022-10-08 17:22:16.562386
3,98082302,https://img3.idealista.com/blur/WEB_LISTING/0/...,151703JC,35.0,,2495000.0,chalet,sale,629.0,False,...,True,True,False,False,"[{'name': 'villaType', 'text': 'Villa'}, {'nam...",False,False,"{'hasParkingSpace': True, 'isParkingSpaceInclu...",,2022-10-08 17:22:16.562386
4,96888896,https://img3.idealista.com/blur/WEB_LISTING/0/...,JR171245,32.0,4,1950000.0,duplex,sale,238.0,True,...,True,True,False,False,,False,False,"{'hasParkingSpace': True, 'isParkingSpaceInclu...",,2022-10-08 17:22:16.562386
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4673,98070201,,654,0.0,bj,175000.0,studio,sale,53.0,True,...,False,False,False,False,,False,False,,,2022-10-08 17:23:13.061836
4674,98366406,,222307,0.0,7,135000.0,flat,sale,95.0,True,...,False,False,False,False,,False,False,,,2022-10-08 17:23:13.061836
4675,98986313,,CAN0000189899,0.0,,207400.0,flat,sale,71.0,False,...,False,False,False,False,,False,False,,,2022-10-08 17:23:13.061836
4676,96610607,,3122,0.0,2,110000.0,flat,sale,36.0,True,...,False,False,False,False,"[{'name': 'apartamentoType', 'text': 'Apartame...",False,False,,,2022-10-08 17:23:13.061836
