# 🧪 Hypothesis Testing - Real Estate Data
This notebook contains statistical hypothesis tests to explore the relationships between housing features and prices.

In [None]:
# Import necessary libraries
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
df = pd.read_csv("real_estate_data_utf8sig.csv")

## 🧩 Hypothesis 1: Is there a correlation between the number of rooms and price?
- H0: There is no correlation between the number of rooms and the price.
- H1: There is a significant correlation between the number of rooms and the price.

In [None]:
# Convert room format like '3+1' to numeric value
df['room_numeric'] = df['number_of_rooms'].str.extract(r'(\d+)').astype(float)

# Pearson correlation
corr, p_value = stats.pearsonr(df['room_numeric'], df['price'])
print(f"Correlation: {corr:.3f}, p-value: {p_value:.5f}")

## 🧩 Hypothesis 2: Are newer buildings more expensive?
- H0: Average prices are the same across different age groups.
- H1: At least one age group has a significantly different average price.

In [None]:
# Create age groups
df['age_group'] = pd.cut(df['building_age'], bins=[-1, 5, 15, 30, 100],
                         labels=['0-5', '6-15', '16-30', '30+'])

# ANOVA test for price by building age group
groups = [group['price'].values for name, group in df.groupby('age_group')]
f_stat, p_value = stats.f_oneway(*groups)
print(f"ANOVA F-statistic: {f_stat:.3f}, p-value: {p_value:.5f}")

## 🧩 Hypothesis 3: Does floor level affect price?
- H0: Average prices are the same for different floor levels.
- H1: At least one floor level has a significantly different average price.

In [None]:
# Filter only numeric floors for simplicity
df_floor = df[df['floor'].str.isnumeric()]
df_floor['floor'] = df_floor['floor'].astype(int)

# Group by floor and perform ANOVA
groups = [group['price'].values for name, group in df_floor.groupby('floor') if len(group) > 30]
f_stat, p_value = stats.f_oneway(*groups)
print(f"ANOVA F-statistic (floor): {f_stat:.3f}, p-value: {p_value:.5f}")