Load and flatten city-trees.json

In [None]:
import json

def flatten_json(input_json):
    result = {}

    # Helper function to traverse and flatten the JSON
    def traverse(current, parent_key):
        if isinstance(current, dict):
            for key, value in current.items():
                traverse(value, f"{parent_key} {key}".strip())
        elif isinstance(current, int):
            # When an integer is reached, store it in the result
            parent_key_parts = parent_key.split()
            if len(parent_key_parts) >= 2:
                category = parent_key_parts[0]  # e.g., "short" or "tall"
                street_name = " ".join(parent_key_parts[1:])
                result.setdefault(category, {})[street_name] = current

    # Start traversal
    for main_key, sub_json in input_json.items():
        traverse(sub_json, main_key)

    return result

# Load the JSON File
with open("data/city-trees.json", "r") as file:
    input_json = json.load(file)

# Call the function
flattened_json = flatten_json(input_json)

# pretty print the result
print(json.dumps(flattened_json, indent=4))


check if city-trees.json data is consistent and correct

In [None]:
# get min and max values for each categorry
short_min_val = min(flattened_json["short"].values())
short_max_val = max(flattened_json["short"].values())
tall_min_val = min(flattened_json["tall"].values())
tall_max_val = max(flattened_json["tall"].values())

print('short_min_val', short_min_val)
print('short_max_val', short_max_val)
print('tall_min_val', tall_min_val)
print('tall_max_val', tall_max_val)

# make sure the data is correct by checking each item
for key, value in flattened_json.items():
    for sub_key, sub_value in value.items():
        if key == "short" and (sub_value < short_min_val or sub_value > short_max_val):
            print(f"short value out of range {sub_key} {sub_value}")
        if key == "tall" and (sub_value < tall_min_val or sub_value > tall_max_val):
            print(f"tall value out of range {sub_key} {sub_value}") 

Load and analyze *property-data.csv*

In [30]:
import pandas as pd
import re

def convert_to_float(s):
    s = re.sub(r'[^0-9.]', '', s)
    f = round(float(s), 2)
    return f

# Load the data/property-data.csv
property_data = pd.read_csv("data/property-data.csv", encoding="latin1")

 # print the data frame properly
property_data

# check for null or empty values.
# if any null or empty values are found, throw an error
if property_data.isnull().values.any():
    raise ValueError("Null found in property-data.csv")
if property_data.empty:
    raise ValueError("Empty dataframe found in property-data.csv")

# check if Price is a float.
# if not, convert it to a float with precision of 2
if not property_data["Price"].dtype == float:
    property_data["Price"] = property_data["Price"].apply(convert_to_float)

# check if Price is a float.
# if not, throw an error
if not property_data["Price"].dtype == float:
    raise ValueError("Price is not a float")

print(property_data["Price"][0])

79500.0
