In [None]:
import base64
import requests as rq
import json
import pandas as pd

In [None]:
# Define a function in order to obtain our personalised token

def get_oauth_token():
    '''
    This function will return our personalised token
    '''
    api_key = 'fmsbh2n7x8fp28gv1zjz994y7zcb349h'   # Your API key provided by Idealista
    secret = 'Cv6xmEfeYMSL'   # Your secred code provided by Idealista

    message = api_key + ":" + secret   # Combine the API key and the secret to get our personalised message

    auth = "Basic " + base64.b64encode(message.encode("ascii")).decode("ascii")   # Encode the message

    headers_dic = {"Authorization" : auth,
                   "Content-Type" : "application/x-www-form-urlencoded;charset=UTF-8"}   # Define our headers

    params_dic = {"grant_type" : "client_credentials",   # Define the request params
                  "scope" : "read"}

    r = rq.post("https://api.idealista.com/oauth/token",   # Perform the request with the api url, headers and params
                      headers = headers_dic,
                      params = params_dic)

    token = json.loads(r.text)['access_token']   # Obtain the personalised token, as a json

    return token

In [None]:
# This are the params we will use to filter our search

base_url = 'https://api.idealista.com/3.5/'     # Base search url
country = 'es'     # Search country (es, it, pt)
language = 'es'     # Search language (es, it, pt, en, ca)
max_items = '50'     # Max items per call, the maximum set by Idealista is 50
operation = 'sale'     # Kind of operation (sale, rent)
property_type = 'homes'     # Type of property (homes, offices, premises, garages, bedrooms)
order = 'distance'     # Order of the listings, consult documentation for all the available orders
center = '37.391285,-5.962270'     # Coordinates of the search center
distance = '60000'     # Max distance from the center
sort = 'asc'     # How to sort the found items
bankOffer = 'false'     # If the owner is a bank
# maxprice = '750'     # Max price of the listings

In [None]:
# Define a function to obtain our search url

def define_search_url():
    '''
    This function will combine our params with the url, in order to create our own search url
    '''
    url = (base_url +
           country +
           '/search?operation=' + operation +
           '&maxItems=' + max_items +
           '&order=' + order +
           '&center=' + center +
           '&distance=' + distance +
           '&propertyType=' + property_type +
           '&sort=' + sort +
           '&numPage=%s' +
           # '&maxPrice=' + maxprice +
           '&language=' + language)

    return url

In [None]:
url = define_search_url()
url

'https://api.idealista.com/3.5/es/search?operation=sale&maxItems=50&order=distance&center=37.391285,-5.962270&distance=60000&propertyType=homes&sort=asc&numPage=%s&language=es'

In [None]:
def search_api(url):
    '''
    This function will use the token and url created previously, and return our search results.
    '''
    token = get_oauth_token()   #  Get the personalised token

    headers = {'Content-Type': 'Content-Type: multipart/form-data;',   # Define the search headers
               'Authorization' : 'Bearer ' + token}

    content = rq.post(url, headers = headers)   # Return the content from the request

    result = json.loads(content.text)   # Transform the result as a json file

    return result

In [None]:
# Since we need to give pagination to our search and this is our first search, we will set the pagination as 1
pagination = 1
first_search_url = url %(pagination)

In [None]:
# Proceed to do the search with the paginated url
results = search_api(first_search_url)

In [None]:
results

{'elementList': [{'propertyCode': '104379438',
   'thumbnail': 'https://img3.idealista.com/blur/WEB_LISTING/0/id.pro.es.image.master/c6/05/c5/1218705239.jpg',
   'numPhotos': 14,
   'floor': '1',
   'price': 114000.0,
   'priceInfo': {'price': {'amount': 114000.0,
     'priceDropInfo': {'formerPrice': 119000.0,
      'priceDropValue': 5000,
      'priceDropPercentage': 4}}},
   'propertyType': 'flat',
   'operation': 'sale',
   'size': 90.0,
   'exterior': True,
   'rooms': 2,
   'bathrooms': 1,
   'address': 'calle Memphis',
   'province': 'Sevilla',
   'municipality': 'Sevilla',
   'district': 'San Pablo',
   'country': 'es',
   'neighborhood': 'San Pablo',
   'latitude': 37.3913318,
   'longitude': -5.9622749,
   'showAddress': False,
   'url': 'https://www.idealista.com/inmueble/104379438/',
   'distance': '5',
   'description': '¡OPORTUNIDAD! Se vende piso en calle Memphis. El piso es un primero, el cual consta de 90m2 distribuidos en 2 dormitorios (antes 3), cocina, baño y salón 

In [None]:
total_pages = results['totalPages']
total_pages

203

In [None]:
def results_to_df(results):
    '''
    This function will save the json results as a dataframe and return the resulting dataframe
    '''
    df = pd.DataFrame.from_dict(results['elementList'])

    return df

In [None]:
def concat_df(df, df_tot):
    '''
    This function will take the main dataframe (df_tot), and concat it with the given individual dataframe,
    returning the main dataframe
    '''
    df_tot = pd.concat([df_tot,df])

    return df_tot

In [None]:
# Proceed to save the obtained results as a dataframe
df = results_to_df(results)
df

Unnamed: 0,propertyCode,thumbnail,numPhotos,floor,price,priceInfo,propertyType,operation,size,exterior,...,has3DTour,has360,hasStaging,topNewDevelopment,topPlus,highlight,externalReference,parkingSpace,newDevelopmentFinished,labels
0,104379438,https://img3.idealista.com/blur/WEB_LISTING/0/...,14,1,114000.0,"{'price': {'amount': 114000.0, 'priceDropInfo'...",flat,sale,90.0,True,...,False,False,False,False,False,,,,,
1,104577954,https://img3.idealista.com/blur/WEB_LISTING/0/...,24,4,95000.0,{'price': {'amount': 95000.0}},flat,sale,67.0,True,...,False,False,False,False,False,{'groupDescription': 'Destacado'},,,,
2,104088494,https://img3.idealista.com/blur/WEB_LISTING/0/...,23,12,187000.0,{'price': {'amount': 187000.0}},flat,sale,125.0,True,...,False,False,False,False,False,,,,,
3,104583410,https://img3.idealista.com/blur/WEB_LISTING/0/...,25,1,99995.0,{'price': {'amount': 99995.0}},flat,sale,53.0,True,...,False,False,False,False,False,{'groupDescription': 'Top'},V-SPA-513,,,
4,103550397,https://img3.idealista.com/blur/WEB_LISTING/0/...,25,2,270000.0,{'price': {'amount': 270000.0}},flat,sale,91.0,,...,False,True,False,False,False,{'groupDescription': 'Destacado'},CAN0000197397,,,
5,102916820,https://img3.idealista.com/blur/WEB_LISTING/0/...,1,3,44900.0,{'price': {'amount': 44900.0}},flat,sale,54.0,,...,False,False,False,False,False,,CS-19198-0001,,,
6,104717854,https://img3.idealista.com/blur/WEB_LISTING/0/...,8,en,95000.0,{'price': {'amount': 95000.0}},flat,sale,65.0,True,...,False,False,False,False,False,,San Pablo,,,
7,104669506,https://img3.idealista.com/blur/WEB_LISTING/0/...,13,3,128000.0,{'price': {'amount': 128000.0}},flat,sale,77.0,True,...,False,False,False,False,False,{'groupDescription': 'Destacado'},manuel-22041,,,
8,104455365,https://img3.idealista.com/blur/WEB_LISTING/0/...,21,2,107000.0,{'price': {'amount': 107000.0}},flat,sale,75.0,True,...,False,False,False,False,False,,,,,
9,103772430,https://img3.idealista.com/blur/WEB_LISTING/0/...,24,1,248000.0,{'price': {'amount': 248000.0}},flat,sale,67.0,True,...,False,False,False,False,False,{'groupDescription': 'Destacado'},1B4101168,"{'hasParkingSpace': True, 'isParkingSpaceInclu...",,


In [None]:
# Since we still don't have a main dataframe where we can store all the data, we will create an empty dataframe
df_tot = pd.DataFrame()
df_tot = concat_df(df, df_tot)

In [None]:
df_tot

Unnamed: 0,propertyCode,thumbnail,numPhotos,floor,price,priceInfo,propertyType,operation,size,exterior,...,has3DTour,has360,hasStaging,topNewDevelopment,topPlus,highlight,externalReference,parkingSpace,newDevelopmentFinished,labels
0,104379438,https://img3.idealista.com/blur/WEB_LISTING/0/...,14,1,114000.0,"{'price': {'amount': 114000.0, 'priceDropInfo'...",flat,sale,90.0,True,...,False,False,False,False,False,,,,,
1,104577954,https://img3.idealista.com/blur/WEB_LISTING/0/...,24,4,95000.0,{'price': {'amount': 95000.0}},flat,sale,67.0,True,...,False,False,False,False,False,{'groupDescription': 'Destacado'},,,,
2,104088494,https://img3.idealista.com/blur/WEB_LISTING/0/...,23,12,187000.0,{'price': {'amount': 187000.0}},flat,sale,125.0,True,...,False,False,False,False,False,,,,,
3,104583410,https://img3.idealista.com/blur/WEB_LISTING/0/...,25,1,99995.0,{'price': {'amount': 99995.0}},flat,sale,53.0,True,...,False,False,False,False,False,{'groupDescription': 'Top'},V-SPA-513,,,
4,103550397,https://img3.idealista.com/blur/WEB_LISTING/0/...,25,2,270000.0,{'price': {'amount': 270000.0}},flat,sale,91.0,,...,False,True,False,False,False,{'groupDescription': 'Destacado'},CAN0000197397,,,
5,102916820,https://img3.idealista.com/blur/WEB_LISTING/0/...,1,3,44900.0,{'price': {'amount': 44900.0}},flat,sale,54.0,,...,False,False,False,False,False,,CS-19198-0001,,,
6,104717854,https://img3.idealista.com/blur/WEB_LISTING/0/...,8,en,95000.0,{'price': {'amount': 95000.0}},flat,sale,65.0,True,...,False,False,False,False,False,,San Pablo,,,
7,104669506,https://img3.idealista.com/blur/WEB_LISTING/0/...,13,3,128000.0,{'price': {'amount': 128000.0}},flat,sale,77.0,True,...,False,False,False,False,False,{'groupDescription': 'Destacado'},manuel-22041,,,
8,104455365,https://img3.idealista.com/blur/WEB_LISTING/0/...,21,2,107000.0,{'price': {'amount': 107000.0}},flat,sale,75.0,True,...,False,False,False,False,False,,,,,
9,103772430,https://img3.idealista.com/blur/WEB_LISTING/0/...,24,1,248000.0,{'price': {'amount': 248000.0}},flat,sale,67.0,True,...,False,False,False,False,False,{'groupDescription': 'Destacado'},1B4101168,"{'hasParkingSpace': True, 'isParkingSpaceInclu...",,


In [None]:
# This will loop over all the pages in the search results. It start from 2, because we've already gotten the first page
for i in range(2, total_pages + 1):

    url_page = url %(i)   # Add the pagination to the url
    results = search_api(url_page)   # Get the search results
    df = results_to_df(results)   # Save the results as a dataframe
    df_tot = concat_df(df, df_tot)   # Concat the results to the main dataframe

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
df_tot

Unnamed: 0,propertyCode,thumbnail,numPhotos,floor,price,priceInfo,propertyType,operation,size,exterior,...,has3DTour,has360,hasStaging,topNewDevelopment,topPlus,highlight,externalReference,parkingSpace,newDevelopmentFinished,labels
0,104379438,https://img3.idealista.com/blur/WEB_LISTING/0/...,14,1,114000.0,"{'price': {'amount': 114000.0, 'priceDropInfo'...",flat,sale,90.0,True,...,False,False,False,False,False,,,,,
1,104577954,https://img3.idealista.com/blur/WEB_LISTING/0/...,24,4,95000.0,{'price': {'amount': 95000.0}},flat,sale,67.0,True,...,False,False,False,False,False,{'groupDescription': 'Destacado'},,,,
2,104088494,https://img3.idealista.com/blur/WEB_LISTING/0/...,23,12,187000.0,{'price': {'amount': 187000.0}},flat,sale,125.0,True,...,False,False,False,False,False,,,,,
3,104583410,https://img3.idealista.com/blur/WEB_LISTING/0/...,25,1,99995.0,{'price': {'amount': 99995.0}},flat,sale,53.0,True,...,False,False,False,False,False,{'groupDescription': 'Top'},V-SPA-513,,,
4,103550397,https://img3.idealista.com/blur/WEB_LISTING/0/...,25,2,270000.0,{'price': {'amount': 270000.0}},flat,sale,91.0,,...,False,True,False,False,False,{'groupDescription': 'Destacado'},CAN0000197397,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45,102063111,https://img3.idealista.com/blur/WEB_LISTING/0/...,32,,350000.0,{'price': {'amount': 350000.0}},chalet,sale,285.0,,...,False,False,False,False,False,{'groupDescription': 'Destacado'},1949SG,"{'hasParkingSpace': True, 'isParkingSpaceInclu...",,
46,104500350,https://img3.idealista.com/blur/WEB_LISTING/0/...,29,,240000.0,{'price': {'amount': 240000.0}},chalet,sale,149.0,,...,False,False,False,False,False,,01468,"{'hasParkingSpace': True, 'isParkingSpaceInclu...",,
47,104603468,https://img3.idealista.com/blur/WEB_LISTING/0/...,38,,125000.0,{'price': {'amount': 125000.0}},countryHouse,sale,145.0,,...,False,False,False,False,False,,CRV-107,,,
48,104358287,https://img3.idealista.com/blur/WEB_LISTING/0/...,10,bj,146900.0,{'price': {'amount': 146900.0}},flat,sale,100.0,False,...,False,False,False,False,False,{'groupDescription': 'Destacado'},1435-M2387,"{'hasParkingSpace': True, 'isParkingSpaceInclu...",,


In [None]:
# Once we have all our data, we just need to save it as a csv file, we have created the following function for that:
import os

file_name = 'idealistaCompra.csv'

def df_to_csv(df):
    '''
    This function takes a given dataframe and saves it as a CSV file in the same directory as the Colab notebook.
    '''
    file_path = os.path.join(os.getcwd(), file_name)
    df = df.reset_index()   # Reset the index to organize the records
    df.to_csv(file_path, index=False)   # Save it into a CSV
    return file_path

In [None]:
# Run the function to save the dataframe as a CSV file in the same directory as the Colab notebook
csv_file_path = df_to_csv(df_tot)
print("CSV file saved at:", csv_file_path)

CSV file saved at: /content/idealistaCompra.csv
