# Scratch work for parsing the json from the backend API

### URL parsing
backend api urls are something like: https://sapi.craigslist.org/web/v8/postings/search/full?CC=US&batch=2-0-360-0-0-1&lang=en&searchPath=apa

batch= params are important.

parameter 1 is a region code, 7 = LA, 656 = Missoula `api_region_mapping.ipynb` is scratchwork to create a internal api mapping scheme

parameter 3 is a code for how many results to include in request. 10,000 seems to be the right choice here, because it will automatically filter to the last 45 days of available results, except in a few cases of huge cities where it filters to the most recent 10,000 results

other params seem to be filtering options for different booleans like pets allowed 0/1 etc.

searchPath= is also important, cta is cars, apa is apartments, rea is real estate etc.

In [25]:
with open('data/la_sapi.txt', 'r') as file:
    document_content = file.read()


In [26]:
print(len(document_content))

5322145


extract the json from the cl.jsonp()

In [27]:
import json
import re

# Extract the JSON string from the document content
json_string = re.search(r'cl\.jsonp\(.*?,\s*(.*)\)', document_content).group(1)

# Parse the JSON string into a Python object
data = json.loads(json_string)


In [36]:
area_name = data['data']['areas']['7']['name']
area_name

'losangeles'

In [28]:
listings = data['data']['items']
count = 0
import pandas as pd

# Initialize an empty list to store the data
data_list = []

for listing in listings:
    count += 1
    title = next((item[1] for item in listing if isinstance(item, list) and item[0] == 6), None)
    price = next((item[1] for item in listing if isinstance(item, list) and item[0] == 10), None)
    bedrooms = next((item[1] for item in listing if isinstance(item, list) and item[0] == 5), None)
    
    # Extract square feet, assuming it follows the bedrooms value
    square_feet = next((item[2] for item in listing if isinstance(item, list) and item[0] == 5 and len(item) > 2), None)
    
    if isinstance(listing[4], str) and '~' in listing[4]:
        _, latitude, longitude = listing[4].split('~')
    else:
        latitude, longitude = None, None
    
    # Append the extracted data to the list as a dictionary
    data_list.append({
        "Title": title,
        "Price": price,
        "Bedrooms": bedrooms,
        "Square Feet": square_feet,  # Add square feet to the dictionary
        "Latitude": latitude,
        "Longitude": longitude
    })

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(data_list)

# Display the DataFrame
df.Bedrooms.describe()


count    9664.000000
mean        1.391142
std         0.902667
min         0.000000
25%         1.000000
50%         1.000000
75%         2.000000
max         8.000000
Name: Bedrooms, dtype: float64

In [30]:
df

Unnamed: 0,Title,Price,Bedrooms,Square Feet,Latitude,Longitude
0,los-angeles-central-c-bedroom-bath-in,"$2,725",1.0,700.0,34.0696,-118.4539
1,los-angeles-bedroom-ba-in-koreatown,"$2,700",2.0,800.0,34.0654,-118.2954
2,santa-monica-garbage-disposal-some-paid,"$2,500",1.0,600.0,34.0249,-118.4809
3,los-angeles-smart-tv-stylish-fully,"$1,350",0.0,175.0,34.0614,-118.2385
4,santa-clarita-central-heat-and-air,"$2,166",1.0,690.0,34.4183,-118.558
...,...,...,...,...,...,...
9995,pasadena-modern-clubhouse-social-lounge,"$3,045",1.0,805.0,34.1498,-118.0822
9996,la-puente-in-west-covina-ca-2b-1b,"$2,635",2.0,919.0,34.0303,-117.9152
9997,culver-city-experience-the-indie,"$3,920",2.0,940.0,34.0208,-118.4069
9998,van-nuys-your-pets-are-like-family-here,"$2,320",1.0,714.0,34.1885,-118.4663


In [39]:
# Calculate median price per bedroom
df['Price'] = df['Price'].replace('[\$,]', '', regex=True).astype(float)
df['Price per Bedroom'] = df['Price'] / (df['Bedrooms'] + 0.00000000000001)
median_price_per_bedroom = df['Price per Bedroom'].median()

# Calculate median price per square foot
df['Price per Sq Ft'] = df['Price'] / df['Square Feet']
median_price_per_sq_ft = df['Price per Sq Ft'].median()

print(f"Median Price per Bedroom: ${median_price_per_bedroom:.2f}")
print(f"Median Price per Square Foot: ${median_price_per_sq_ft:.2f}")


Median Price per Bedroom: $1945.00
Median Price per Square Foot: $3.28


In [38]:
df

Unnamed: 0,Title,Price,Bedrooms,Square Feet,Latitude,Longitude,Price per Bedroom,Price per Sq Ft
0,los-angeles-central-c-bedroom-bath-in,2725.0,1.0,700.0,34.0696,-118.4539,2725.0,3.892857
1,los-angeles-bedroom-ba-in-koreatown,2700.0,2.0,800.0,34.0654,-118.2954,1350.0,3.375000
2,santa-monica-garbage-disposal-some-paid,2500.0,1.0,600.0,34.0249,-118.4809,2500.0,4.166667
3,los-angeles-smart-tv-stylish-fully,1350.0,0.0,175.0,34.0614,-118.2385,inf,7.714286
4,santa-clarita-central-heat-and-air,2166.0,1.0,690.0,34.4183,-118.558,2166.0,3.139130
...,...,...,...,...,...,...,...,...
9995,pasadena-modern-clubhouse-social-lounge,3045.0,1.0,805.0,34.1498,-118.0822,3045.0,3.782609
9996,la-puente-in-west-covina-ca-2b-1b,2635.0,2.0,919.0,34.0303,-117.9152,1317.5,2.867247
9997,culver-city-experience-the-indie,3920.0,2.0,940.0,34.0208,-118.4069,1960.0,4.170213
9998,van-nuys-your-pets-are-like-family-here,2320.0,1.0,714.0,34.1885,-118.4663,2320.0,3.249300


In [29]:
print(count)
print(listing)

10000
[3793873, 0, 1, 10000, '1:1077~33.9779~-118.4525', [4, '3:01717_bQ0vsSExyEu_0CI0pO', '3:01717_a8x4lUQDTjk_0CI0pO', '3:00P0P_iKPcJyuLrn2_0CI0pO', '3:00000_3VHSlqa6vQ8_0CI0pO', '3:00Q0Q_hcDuDD3vnQU_0CI0pO', '3:00N0N_3TrChwJdWDh_0CI0pO', '3:00m0m_1Zuw4uoKFcO_0uY0kE', '3:00a0a_6DM25jiBCWW_0kl0t2', '3:00r0r_4GzbQdziVyG_0CI0pO', '3:00n0n_3q6yPWFiTsO_0CI0pO', '3:00k0k_90uJO5ybb07_0CI0pO', '3:00000_9wKG2R1N3D2_0CI0pO', '3:00Z0Z_b5Upt5pDU0E_0CI0pO', '3:00909_kb4IHuzFDxL_0CI0pO', '3:00f0f_cwQwluElipA_0CI0pO', '3:00303_5SI3jIvF94b_0uY0kE', '3:00D0D_2i2TGZi5NiC_0CI0pO', '3:00101_814lh6A5YxE_0CI0pO'], [6, 'marina-del-rey-great-work-live-space-in'], [10, '$10,000'], 'GREAT Work Live Space in Venice', [5, 2, 3700]]
