# Apartment Market Analysis (EDA & Modeling)

This notebook covers the Exploratory Data Analysis (EDA) of data scraped from real estate portals and experimenting with model training for apartment price prediction.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os

# Add project root to path for imports
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from src.model.train_model import load_data, prepare_data, train_model

%matplotlib inline

## 1. Load Data

In [None]:
df = load_data('../data/raw/apartments_raw_data.csv')
print(f"Loaded {len(df)} ads.")
df.head()

## 2. Cleaning and Feature Extraction

In [None]:
df_clean = prepare_data(df)
print(f"After cleaning, {len(df_clean)} ads remain.")
df_clean.head()

## 3. Data Visualization

### Price Distribution by Region

In [None]:
plt.figure(figsize=(12, 6))
sns.boxplot(x='region', y='price', data=df_clean)
plt.xticks(rotation=45)
plt.title('Apartment Price Distribution by Region')
plt.show()

### Relationship between Area and Price

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(x='area', y='price', hue='region', data=df_clean, alpha=0.6)
plt.title('Price vs. Apartment Area')
plt.xlabel('Area (mÂ²)')
plt.ylabel('Price (CZK)')
plt.show()

## 4. Model Training

In [None]:
model, columns = train_model(df_clean)
print("Model trained successfully.")