# Exploratory Data Analysis - Land Case Simulation

This notebook explores the land data for our simulation project.

In [None]:
import sys
import os

# Tambahkan path secara eksplisit
current_dir = os.getcwd()
src_path = os.path.join(current_dir, 'src')
if src_path not in sys.path:
    sys.path.insert(0, src_path)

# Impor library
try:
    import geopandas as gpd
    import pandas as pd
    import matplotlib.pyplot as plt
    import plotly.express as px
    from data_ingestion import load_config, read_shapefile
    from data_processing import clean_geospatial_data
    from visualization import create_interactive_map, save_plotly_figure
    print("✓ Semua library berhasil diimpor!")
    
    # Verifikasi fungsi tersedia
    print("Fungsi yang tersedia:")
    print(f"load_config: {'load_config' in dir()}")
    print(f"read_shapefile: {'read_shapefile' in dir()}")
    
except ImportError as e:
    print(f"✗ Error impor: {e}")
    print("Instal dependencies dengan: pip install -r requirements.txt")

In [None]:
# Load configuration and data
config = load_config()
print("Configuration loaded:", config)

# Read the shapefile
gdf = read_shapefile('../data/raw/sample_land_data.shp')
print(f"Data loaded with {len(gdf)} records")

# Clean the data
gdf_clean = clean_geospatial_data(gdf, config['project_settings']['default_crs'])
print("Data cleaned successfully")

In [None]:
# Explore the data
print("Data columns:", gdf_clean.columns.tolist())
print("\nData info:")
print(gdf_clean.info())
print("\nFirst 5 rows:")
print(gdf_clean.head())

# Basic statistics
print("\nValue statistics:")
print(gdf_clean['value'].describe())

In [None]:
# Create visualizations
# Histogram of values
plt.figure(figsize=(10, 6))
plt.hist(gdf_clean['value'], bins=20, edgecolor='black')
plt.title('Distribution of Land Values')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.show()

# Interactive map
fig = create_interactive_map(gdf_clean, 'value', 'Land Value Distribution')
fig.show()