# Satellite Imagery-Based Property Valuation: Preprocessing & EDA

This notebook handles loading the tabular data, performing exploratory data analysis (EDA), and visualizing the geospatial distribution of properties.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
from shapely.geometry import Point

# Set plot style
sns.set(style="whitegrid")
%matplotlib inline

## 1. Load Data

In [None]:
df_train = pd.read_csv('../data/train.csv')
df_test = pd.read_csv('../data/test.csv')

print(f"Training Data Shape: {df_train.shape}")
print(f"Test Data Shape: {df_test.shape}")
df_train.head()

## 2. Basic Data Inspection

In [None]:
df_train.info()

In [None]:
df_train.describe()

## 3. Geospatial Analysis

In [None]:
# Scatter plot of House Price vs Location
plt.figure(figsize=(10, 8))
sns.scatterplot(x='long', y='lat', hue='price', data=df_train, palette='viridis', alpha=0.6)
plt.title('Property Price Distribution by Location')
plt.show()

## 4. Feature Correlations

In [None]:
plt.figure(figsize=(12, 10))
corr_matrix = df_train.select_dtypes(include=[np.number]).corr()
sns.heatmap(corr_matrix, annot=False, cmap='coolwarm')
plt.title('Feature Correlation Matrix')
plt.show()