# Chicago Real Estate Data Analysis
This notebook provides an analysis of the Chicago real estate data.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set plot style
sns.set(style='whitegrid')

In [None]:
# Load the data
file_path = 'real_estate_data_chicago.csv'
df = pd.read_csv(file_path)
df.head()

## Data Cleaning
Handle missing values and correct data types.

In [None]:
# Data Cleaning
# Convert 'soldOn' to datetime
df['soldOn'] = pd.to_datetime(df['soldOn'], errors='coerce')

# Fill missing values
df['lastSoldPrice'].fillna(0, inplace=True)
df['soldOn'].fillna(pd.Timestamp('1970-01-01'), inplace=True)
df['garage'].fillna(0, inplace=True)
df['baths_half'].fillna(0, inplace=True)

# Convert data types
df['garage'] = df['garage'].astype(int)
df['baths_half'] = df['baths_half'].astype(int)
df['lastSoldPrice'] = df['lastSoldPrice'].astype(float)

df.info()

## Exploratory Data Analysis (EDA)
Generate summary statistics and visualizations to understand the data.

In [None]:
# Summary statistics
df.describe()

In [None]:
# Distribution of property types
plt.figure(figsize=(10, 6))
sns.countplot(data=df, x='type')
plt.title('Distribution of Property Types')
plt.show()

In [None]:
# Price trends over the years
plt.figure(figsize=(14, 7))
sns.lineplot(data=df, x='year_built', y='listPrice', ci=None)
plt.title('Price Trends Over the Years')
plt.show()

## Feature Engineering
Create new features that might be useful for analysis.

In [None]:
# Feature Engineering
# Example: Create a new feature 'age' of the property
df['age'] = 2023 - df['year_built']
df.head()

## Analysis
Perform specific analyses such as price trends, distribution of property types, etc.

In [None]:
# Analysis
# Example: Average price by property type
avg_price_by_type = df.groupby('type')['listPrice'].mean()
avg_price_by_type.plot(kind='bar', figsize=(10, 6))
plt.title('Average Price by Property Type')
plt.ylabel('Average Price')
plt.show()

## Conclusion
Summarize the findings and insights.

In this analysis, we explored the Chicago real estate data, performed data cleaning, and conducted exploratory data analysis. We also engineered new features and performed specific analyses to gain insights into the real estate market in Chicago.