# Extract the Data

In [None]:
# Required libraries
import os
from dotenv import load_dotenv, find_dotenv
import base64
import urllib
import requests as rq
import json
import pandas as pd
import time

In [None]:
# Get authentication token
def get_oauth_token():

    url = "https://api.idealista.com/oauth/token"

    load_dotenv(find_dotenv('creds.env')) # Load .env file
    apikey = os.environ.get("API_KEY")
    secret = os.environ.get("SECRET")
    apikey_secret = apikey + ':' + secret

    auth = str(base64.b64encode(bytes(apikey_secret, 'utf-8')))[2:][:-1] # Get base64 encoded string

    headers = {'Authorization' : 'Basic ' + auth,'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8'}
    params = urllib.parse.urlencode({'grant_type':'client_credentials'}) #,'scope':'read'
    content = rq.post(url,headers = headers, params=params) # Get response
    bearer_token = json.loads(content.text)['access_token'] # Get access token

    return bearer_token

In [None]:
# Get list of properties
def search_api(token, params):
    url = "https://api.idealista.com/3.5/es/search"

    headers = {'Content-Type': 'Content-Type: multipart/form-data;', 'Authorization' : 'Bearer ' + token} 
    content = rq.post(url, headers=headers, params=params) # Get response
    
    print(content)
    return content

**Note**: *locationId* goes from 0-EU-ES-01 to 0-EU-ES-56 for Spain.

**REQUIREMENTS**:
- country = ['es', 'it', 'pt']
- operation = ['sale', 'rent']
- propertyType = ['homes', 'offices', 'premises', 'garages', 'bedrooms']
- you must specify a center + distance or locationId in each request

In [None]:
params = {
    "country" : 'es',
    "operation" : "rent",
    "propertyType" : "homes",
    "locationId" : "0-EU-ES-46",
    "maxItems" : 50,
}

# Test connection
token = get_oauth_token()
result = search_api(token, params)

In [None]:
# Print result
result.text

In [None]:
# get totalPages
totalPages = json.loads(result.text)['totalPages']
print(totalPages)

In [None]:
# get actual page
actualPage = json.loads(result.text)['actualPage']
print(actualPage)

In [None]:
df_tot = pd.DataFrame(json.loads(result.text)["elementList"])
df_tot

In [None]:
for i in range(2, totalPages):
    try:
        params['numPage'] = i
        result = search_api(token, params)
        df = pd.DataFrame(json.loads(result.text)["elementList"])
        df_tot = pd.concat([df_tot, df])
        time.sleep(5)
    except Exception as e:
        print(e, "Page: ", i)

In [None]:
df_tot.reset_index(drop=True, inplace=True)

In [None]:
# ¿All the propertyCode are unique?
df_tot['propertyCode'].nunique()

In [None]:
df_tot