In [9]:
import pandas as pd
import json
import numpy as np

In [None]:

def safe_get(data, keys, default=None):
    """Safely get nested dictionary values"""
    current = data
    for key in keys:
        if isinstance(current, dict) and key in current:
            current = current[key]
        else:
            return default
    return current

def update_main(df2):

    consolidate = pd.read_csv('../cleaned/localidades.csv')
    consolidate = consolidate.set_index('id')
    merged_df =   consolidate.combine_first(df2)  
    
    merged_df.to_csv('../cleaned/localidades.csv')

# Extract buckets data
def parse_buckets_json(localidad_id):
    """
    Parse JSON file with bucket data and create a DataFrame with bucket_min as columns
    
    Args:
        file_path (str): Path to the JSON file
    
    Returns:
        pandas.DataFrame: DataFrame with id and bucket values as columns
    """
    # Read JSON from file

    with open(f"../sources/{localidad_id}/minimum_stay.json", 'r') as file:
        data = json.load(file)
    
    # Extract id and buckets using safe_get
    submarket_id = safe_get(data, ['payload', 'submarket_id'])
    buckets = safe_get(data, ['payload', 'buckets'], [])
        
        # If no buckets found, return empty DataFrame
    if not buckets:
        print("No buckets found in the JSON data.")
        return pd.DataFrame()
        
        # Create initial dictionary with id
    row_data = {'id': int(submarket_id)}
        
        # Add each bucket value to the row
    for bucket in buckets:
        bucket_min = safe_get(bucket, ['bucket_min'])
        value = safe_get(bucket, ['value'])
            
            # Use bucket_min as column name
        if bucket_min is not None:
            # Create column name like 'bucket_1' for bucket_min = 1
            column_name = f'rent_min_stay_{bucket_min}_nights'
            row_data[column_name] = value
        
        # Create DataFrame from the single row
    df = pd.DataFrame([row_data])
    print(df)
    df = df.set_index('id')

    updated = update_main(df)
        
    return updated

In [13]:
if __name__ == "__main__":
    # Specify your JSON file path here
    localidad_id="142649"
    # Parse the JSON file
    df = parse_buckets_json(localidad_id)
    print(df)


       id  rent_min_stay_1_nights  rent_min_stay_2_nights  \
0  142649                      30                      17   

   rent_min_stay_3_nights  rent_min_stay_4_nights  rent_min_stay_7_nights  \
0                       7                       0                       1   

   rent_min_stay_30_nights  
0                        1  
None
