# Exploratory Data Analysis: kc_house_data.csv
This notebook performs an initial EDA. Put `kc_house_data.csv` in the same folder as this notebook.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', 50)
df = pd.read_csv('kc_house_data.csv')
df.head()

## 1. Overview and info

In [None]:
df.info()

## 2. Summary statistics

In [None]:
df.describe(include='all')

## 3. Missing values

In [None]:
df.isnull().sum()

## 4. Price distribution

In [None]:
plt.figure()
df['price'].hist(bins=50)
plt.title('Histogram of house prices')
plt.xlabel('price')
plt.ylabel('count')
plt.show()

## 5. Price vs sqft_living

In [None]:
plt.figure()
plt.scatter(df['sqft_living'], df['price'], s=10)
plt.title('Price vs sqft_living')
plt.xlabel('sqft_living')
plt.ylabel('price')
plt.show()

## 6. Correlation matrix (numeric features)

In [None]:
corr = df.select_dtypes(include=['number']).corr()
plt.figure(figsize=(10,8))
plt.imshow(corr, cmap='viridis', aspect='auto')
plt.colorbar()
plt.xticks(range(len(corr.columns)), corr.columns, rotation=90)
plt.yticks(range(len(corr.index)), corr.index)
plt.title('Correlation matrix (numeric features) - heatmap')
plt.show()