#Recolección datos M2

In [1]:
import requests
import json
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

In [2]:
def get_config():
    """
    Function to hold the configuration details like URL, headers.
    """
    config = {
        "url": "https://commons-api.metrocuadrado.com/v1/api/commons/queries",
        "headers": {
          'authority': 'commons-api.metrocuadrado.com',
          'accept': '*/*',
          'accept-language': 'en-US,en;q=0.5',
          'cache-control': 'no-cache',
          'content-type': 'application/json',
          'origin': 'https://commons.metrocuadrado.com',
          'pragma': 'no-cache',
          'x-api-key': '6JgwwXGxlC921DP4SB4ST6Jo6OO7rv3t4yXn5Y8y',
          'x-audit-client-id': ''
        }
    }
    return config

In [3]:
def parse_features(features):
    """
    Parses the features list into a dictionary.
    """
    features_dict = {}
    for feature in features:
        if ':' in feature:
            key, value = feature.split(':', 1)
            features_dict[key] = value
    return features_dict

In [4]:
def flatten_data(data):
    """
    Flattens nested JSON data and integrates features into the DataFrame.
    """
    flat_data = []
    for entry in data:
        # Parse and integrate features
        if 'features' in entry:
            features_dict = parse_features(entry['features'])
            entry.update(features_dict)
            del entry['features']  # Remove the original features list

        flat_entry = pd.json_normalize(entry, sep='_')
        flat_data.append(flat_entry)

    return pd.concat(flat_data, ignore_index=True) if flat_data else pd.DataFrame()

In [5]:
def request_data(url, headers, payload):
    """
    Function to make a POST request and return the data.
    """
    response = requests.post(url, data=json.dumps(payload), headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        print("Failed to retrieve data. Status code:", response.status_code)
        return None

In [6]:
def convert_to_geodf(df, lon_col='location_lon', lat_col='location_lat'):
    """
    Converts a DataFrame with longitude and latitude columns to a GeoDataFrame.
    """
    # Create a new column for geometry
    df['geometry'] = df.apply(lambda row: Point(float(row[lon_col]), float(row[lat_col])), axis=1)
    
    # Convert DataFrame to GeoDataFrame
    gdf = gpd.GeoDataFrame(df, geometry='geometry', crs='EPSG:4326')

    return gdf

In [7]:
def spatial_join(target_gdf, other_gdf, how='inner', op='intersects'):
    """
    Performs a spatial join between two GeoDataFrames.

    :param target_gdf: The target GeoDataFrame.
    :param other_gdf: The other GeoDataFrame to join with.
    :param how: Type of join - 'left', 'right', 'inner' (default: 'inner').
    :param op: Binary predicate - 'intersects', 'contains', 'within', etc. (default: 'intersects').
    :return: A GeoDataFrame after performing the spatial join.
    """
    joined_gdf = gpd.sjoin(target_gdf, other_gdf, how=how, op=op)
    return joined_gdf

In [8]:
def get_payload(status):
    return {
        "queries": [
            {
                "types": ["propertiesByFiltersQuery"],
                "filter": {
                    "propertyTypeId": {"values": ["1", "2"]},
                    "businessTypeId": {"values": ["1", "3"]},
                    "status": {"values": [status]},
                    "priceRange": {"values": ["0", "100000000000000000"]},
                    "geoShape": {"values": [["6.217012327817175", "-77.56347656250001"], ["6.217012327817175", "-67.96142578125001"], ["3.2502085616531686", "-67.96142578125001"], ["3.2502085616531686", "-77.56347656250001"]]},
                    "builtArea": {"values": ["0", "100000000000000000"]}
                },
                "batch": {
                    "realEstate": {"from": 0},
                    "seller": {"from": 0}
                }
            }
        ]
    }

In [10]:
def orchestrate_calls():
    config = get_config()

    #initial_payload = get_payload("Nuevo")
    initial_payload = get_payload("Usado")

    all_data = []  # List to store all flattened data

    response = request_data(config["url"], config["headers"], initial_payload)
    if not response:
        return  # Exit if no response

    # Extract pagination details from the initial response
    real_estate_pages = response.get('data', {}).get('result', {}).get('propertiesByFiltersQuery', {}).get('batch', {}).get('realEstate', {}).get('pages', [])
    seller_pages = response.get('data', {}).get('result', {}).get('propertiesByFiltersQuery', {}).get('batch', {}).get('seller', {}).get('pages', [])

    for i, (real_estate_from, seller_from) in enumerate(zip(real_estate_pages, seller_pages)):
        next_payload = initial_payload.copy()
        next_payload['queries'][0]['batch']['realEstate']['from'] = real_estate_from
        next_payload['queries'][0]['batch']['seller']['from'] = seller_from

        response = request_data(config["url"], config["headers"], next_payload)
        if not response:
            break  # Exit if no response or end of data

        properties = response.get('data', {}).get('result', {}).get('propertiesByFiltersQuery', {}).get('properties', [])
        flat_properties = flatten_data(properties)
        all_data.append(flat_properties)

        if i >= len(real_estate_pages):  # Prevent infinite loop, set SOME_LIMIT as needed
            break

    # Concatenate all data into a single DataFrame
    final_df = pd.concat(all_data, ignore_index=True)
    
    # Assume final_df is your DataFrame from the previous steps
    geo_df = convert_to_geodf(final_df)

    # Load or create another GeoDataFrame to join with
    isocronas = gpd.read_file('data/Isocronas')

    # Perform the spatial join
    joined_gdf = spatial_join(geo_df, isocronas)
    
    return joined_gdf

    # Run the main orchestration function

data = orchestrate_calls()

  df['geometry'] = df.apply(lambda row: Point(float(row[lon_col]), float(row[lat_col])), axis=1)
  data = orchestrate_calls()


In [13]:
data

Unnamed: 0,agent,roomsNumber,areaRangeLabel,updatedDate,salePriceRangeLabel,adminPrice,price,checked,id,area,...,conBanoServicio,conCuartoUtil,conTerraza,conPisoPorcelanato,geometry,index_right,group_inde,Tiempo,layer,Linea
2,Gonzalo Moreno,3,101 a 200,2023-11-21T14:58:43.000Z,1.000'000.001 o m¿s millones,1500000,1535000000,Inmueble No Verificado,815-M3909728,200,...,,,,,POINT (-74.05200 4.66500),31,4,20 min,E16,Linea 1
183,,3,201 a 300,2023-11-03T14:36:35.000Z,1.000'000.001 o m¿s millones,2301494,4300000000,Inmueble No Verificado,MC4654505,271.06,...,,,,,POINT (-74.05829 4.67444),31,4,20 min,E16,Linea 1
368,,1,Hasta 60,2023-12-14T03:10:04.000Z,400'000.001 a 500'000.000,580696,450000000,Inmueble No Verificado,4498-M4674201,55,...,,,,,POINT (-74.05600 4.67300),31,4,20 min,E16,Linea 1
375,Orlando Forero Muñoz,3,101 a 200,2023-12-14T03:10:10.000Z,1.000'000.001 o m¿s millones,863000,1200000000,Inmueble No Verificado,34-M3886623,150,...,,,,,POINT (-74.05553 4.67077),31,4,20 min,E16,Linea 1
495,,2,61 a 100,2023-11-20T23:08:29.000Z,500'000.001 a 650'000.000,748000,590000000,Inmueble No Verificado,MC3827431,86,...,,,,,POINT (-74.05708 4.67310),31,4,20 min,E16,Linea 1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9421,,5,401 a 500,2024-01-02T17:26:38.000Z,650'000.001 a 880'000.000,,870000000,Inmueble No Verificado,RV-M4537661,441.7,...,,,,,POINT (-74.10100 4.69300),65,4,5 min,5,Linea 2
5687,,3,401 a 500,2023-12-13T14:39:00.000Z,880'000.001 a 1.000'000.000,0,970000000,Inmueble No Verificado,MC4719343,440,...,,,,,POINT (-74.16432 4.61977),37,2,5 min,E3,Linea 1
6853,,2,61 a 100,2023-12-28T03:10:08.000Z,300'000.001 a 400'000.000,310000,320000000,Inmueble No Verificado,3325-M3750648,62,...,,,,,POINT (-74.07782 4.60461),13,0,5 min,E12,Linea 1
6905,,2,61 a 100,2023-12-28T03:10:08.000Z,300'000.001 a 400'000.000,310000,320000000,Inmueble No Verificado,3325-M3750648,62,...,,,,,POINT (-74.07782 4.60461),13,0,5 min,E12,Linea 1


In [15]:
data.applymap(lambda x: x.encode('unicode_escape').\
                 decode('utf-8') if isinstance(x, str) else x).\
                    to_excel('data/output_spatial_filtered_usado.xlsx')