In [None]:
import json
import pandas as pd
from pathlib import Path

def parse_listing_json(file_path, section):
    with open(file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    listings = data['payload']['listings']

    df_data = []
    for listing in listings:
        row = {k: v for k, v in listing.items() if k != 'images'}
        if 'location' in row and isinstance(row['location'], dict):
            row['lat'] = row['location'].get('lat')
            row['lng'] = row['location'].get('lng')
            del row['location']
        df_data.append(row)


    df = pd.DataFrame(df_data)
    df['section'] = section    
    print(f"Loaded {len(df)} listings")
    print(f"\nColumns: {list(df.columns)}")
    print(f"\nDataFrame shape: {df.shape}")
    return df

In [None]:
def list_files_with_prefix_store_as_csv(prefix, folder_path="../sources/listings/"):
    
    folder = Path(folder_path)
    if not folder.exists():
        print(f"Folder {folder_path} does not exist")
        return []
    
    matching_files = [f.name for f in folder.iterdir() if f.is_file() and f.name.startswith(prefix)]
    matching_files.sort()
    
    print(f"Found {len(matching_files)} files starting with '{prefix}' in {folder_path}:")
    for file in matching_files:
        print(f"  - {file}")
    
    all_dfs = []
    for file in matching_files:
        df = parse_listing_json(f"../sources/listings/{file}", prefix)
        all_dfs.append(df)
        print(f"Loaded {len(df)} listings from {file}")
    
    if all_dfs:
        combined_df = pd.concat(all_dfs, ignore_index=True)
        output_path = f"../cleaned/listings/{prefix}.csv"
        combined_df.to_csv(output_path, index=False)
        print(f"\nConcatenated {len(all_dfs)} files into single CSV")
        print(f"Total listings: {len(combined_df)}")
        print(f"Saved to {output_path}")
    else:
        print("No data to save")
    
    return matching_files


In [None]:
localidades = pd.read_csv("../cleaned/localidades.csv")
localidades_ids = localidades['id'].tolist()
localidades_ids


for localidad in localidades_ids:
    list_files_with_prefix_store_as_csv(str(localidad))

In [None]:
def list_files_with_prefix(prefix, folder_path="../sources/listings/"):
    
    folder = Path(folder_path)
    if not folder.exists():
        print(f"Folder {folder_path} does not exist")
        return []
    
    matching_files = [f.name for f in folder.iterdir() if f.is_file() and f.name.startswith(prefix)]
    matching_files.sort()
    
    print(f"Found {len(matching_files)} files starting with '{prefix}' in {folder_path}:")
    for file in matching_files:
        print(f"  - {file}")
    all_dfs = pd.DataFrame()
    
    for file in matching_files:
        df = parse_listing_json(f"../sources/listings/{file}", prefix)
        all_dfs = pd.concat([all_dfs, df], ignore_index=True)
        print(f"Loaded {len(df)} listings from {file}")
    return all_dfs



In [None]:
df = list_files_with_prefix("141883")
print(len(df))
df

In [None]:
combined_df = pd.DataFrame()
for localidad in localidades_ids:
    df = list_files_with_prefix(str(localidad))
    combined_df = pd.concat([combined_df, df], ignore_index=True)

combined_df.to_csv("../cleaned/all_listings.csv", index=False)

len(combined_df)    