# Transforming type of land data

In [2]:
import pandas as pd
import json

In [10]:
# Define the path to the JSON file
file_path = '../app/public/data/transactions_of_immovables_by_the_purpose_of_use_county_level.json'

# Load the data
with open(file_path, 'r') as file:
    data = json.load(file)

In [8]:
data

[{'MKOOD': '0037',
  'data': {'2010': [{'Name': 'Residential land',
     'Number': 3096,
     'Total area (ha)': 522,
     'Total value (eur)': 211097934},
    {'Name': 'Profit yielding land',
     'Number': 1042,
     'Total area (ha)': 9582,
     'Total value (eur)': 34264265},
    {'Name': 'Other',
     'Number': 416,
     'Total area (ha)': 298,
     'Total value (eur)': 25264992},
    {'Name': 'Mixed land',
     'Number': 174,
     'Total area (ha)': 362,
     'Total value (eur)': 79699305},
    {'Name': 'Industrial land',
     'Number': 234,
     'Total area (ha)': 179,
     'Total value (eur)': 53721525},
    {'Name': 'Commercial land',
     'Number': 182,
     'Total area (ha)': 68,
     'Total value (eur)': 105422918},
    {'Name': 'TOTAL',
     'Number': 5144,
     'Total area (ha)': 11012,
     'Total value (eur)': 509470937}],
   '2011': [{'Name': 'Residential land',
     'Number': 3110,
     'Total area (ha)': 555,
     'Total value (eur)': 223276453},
    {'Name': 'Profit

In [19]:
# Required categories
required_categories = {
    "Residential land",
    "Profit yielding land",
    "Mixed land",
    "Industrial land",
    "Commercial land",
    "Other"
}

# Function to safely convert to float
def safe_float(value):
    try:
        return float(value)
    except (ValueError, TypeError):
        return 0.0  # Return 0.0 for invalid or non-numeric entries

# Step 1: Compute total national area per land type for each year
total_national_areas = {}

# Iterate through each county and year
for county in data:
    for year, year_data in county['data'].items():
        for entry in year_data:
            if entry['Name'] in required_categories:
                if year not in total_national_areas:
                    total_national_areas[year] = {}
                # Safely convert "Total area (ha)" to float
                total_national_areas[year][entry['Name']] = total_national_areas[year].get(entry['Name'], 0) + safe_float(entry["Total area (ha)"])

# Step 2: Normalize county data by total national area for each year
normalized_data = []

for county in data:
    county_result = {"MKOOD": county["MKOOD"], "data": {}}
    
    for year, year_data in county['data'].items():
        normalized_year_data = []
        total_areas_for_year = total_national_areas.get(year, {})
        
        for entry in year_data:
            if entry['Name'] in required_categories:
                # Safely convert raw_value to float
                raw_value = safe_float(entry["Total area (ha)"])  
                # Safely convert total_national to float
                total_national = safe_float(total_areas_for_year.get(entry['Name'], 1))  # Prevent division by zero
                normalized_value = raw_value / total_national * 100  # Normalized value
                
                normalized_year_data.append({
                    "Name": entry["Name"],
                    "Total area (ha)": normalized_value
                })
        
        county_result["data"][year] = normalized_year_data
    
    normalized_data.append(county_result)

In [20]:
normalized_data

[{'MKOOD': '0037',
  'data': {'2010': [{'Name': 'Residential land',
     'Total area (ha)': 23.04635761589404},
    {'Name': 'Profit yielding land', 'Total area (ha)': 8.993129856965874},
    {'Name': 'Other', 'Total area (ha)': 22.02512934220251},
    {'Name': 'Mixed land', 'Total area (ha)': 18.181818181818183},
    {'Name': 'Industrial land', 'Total area (ha)': 18.415637860082303},
    {'Name': 'Commercial land', 'Total area (ha)': 28.691983122362867}],
   '2011': [{'Name': 'Residential land',
     'Total area (ha)': 24.966261808367072},
    {'Name': 'Profit yielding land', 'Total area (ha)': 9.45762575775824},
    {'Name': 'Other', 'Total area (ha)': 41.078838174273855},
    {'Name': 'Mixed land', 'Total area (ha)': 11.612284069097889},
    {'Name': 'Industrial land', 'Total area (ha)': 18.784530386740332},
    {'Name': 'Commercial land', 'Total area (ha)': 0.0}],
   '2012': [{'Name': 'Residential land',
     'Total area (ha)': 24.889673433362756},
    {'Name': 'Profit yielding lan

In [21]:
# Step 3: Export the result as a new JSON file
with open('normalized_spider_data.json', 'w') as f:
    json.dump(normalized_data, f, indent=4)