In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


df = pd.read_csv("real_estate_data.csv")
print(df.head())

In [None]:
#1	Price Per Square Foot	Column creation, sorting

df['price_per_sqft'] = df['sold_price'] / df['area_sqft']





In [None]:
#2	Profit/Loss Calculation	Math operations, aggregation

df['profit_lost'] = df['sold_price'] - df['listing_price']




df[['property_type','sold_price','listing_price','profit_lost']]




In [None]:

#3	Avg Days on Market by Type	GroupBy, mean


avg_dom = ( 
    df.groupby('property_type')['days_on_market']
    .mean()
    .reset_index()
    .rename(columns={'days_on_market':"avg_days_on_market"})
)


avg_dom



In [None]:
avg_dom_sorted = avg_dom.sort_values('avg_days_on_market')

In [None]:
avg_dom_sorted.plot(
    kind='bar',
    x='property_type',
    y='avg_days_on_market',
    legend=False,

)
plt.xticks(rotation=0)
plt.show()

In [None]:
#5	Agent Performance	GroupBy, sum, ranking


rank_agent = df.groupby('agent_name')['sold_price'].sum().reset_index()
rank_agent['rank'] = rank_agent['sold_price'].rank(ascending=False)


sorted_rank_agent = rank_agent.sort_values(by='rank')

sorted_rank_agent





In [None]:
#6	Percentage Discount	Calculated columns, filtering
#Find properties with discount greater than 5%.
df['percent_discount'] = (df['listing_price'] - df['sold_price']) / df['listing_price'] * 100





In [None]:
df[['percent_discount','listing_price','sold_price']]



more_than_5_discount = df[df['percent_discount'] > 5]


print(f'their is {len(more_than_5_discount)} property with more than 5% discount')




In [None]:
df[['percent_discount','listing_price','sold_price']]

In [None]:
#7	Property Age Calculation	Year operations, groupby


df['property_age']  = 2025 - df['year_built']

df[['property_age','year_built']].head(4)



In [None]:
property_group = df.groupby('property_type').agg(property_age=(
    'property_age',"mean"),
year_built = ('year_built','count'))


property_group


In [None]:
location_statistic = df.groupby('location').agg(
sold_price_mean=('sold_price', 'mean'),
sold_price_max=('sold_price', 'max'),
sold_price_min=('sold_price', 'min'),
avg_bedrooms = ('bedrooms','mean'),
avg_bathrooms = ('bathrooms','mean'),
avg_area_sqft = ('area_sqft','mean'),


).sort_values(by='sold_price_mean',ascending=False)

location_statistic

In [None]:
#	Bedroom-Bathroom Ratio	Ratio calculations, filtering
#Create a column for bedroom-to-bathroom ratio. Find properties where this ratio is greater than 2.
df['bedroom_bathroom_ratio'] = df['bedrooms'] / df['bathrooms']

df.head(3)





In [None]:
greater_than_2 = df[df['bedroom_bathroom_ratio'] > 2]

greater_than_2.head(2)

print(f'thier is {len(greater_than_2)} with the ratio of 2 in bedroom or bathroom')

In [None]:
#10	Monthly Sales Analysis	Date extraction, groupby



df['sale_date'] = pd.to_datetime(df['sale_date'])

df['year_month'] = df['sale_date'].dt.to_period('M')

In [None]:
monthly_sales = (

    df.groupby('year_month')['sold_price']
    .sum()
    .reset_index()
    .rename(columns={'sold_price':'total_sales'})


)

monthly_sales

In [None]:
sold_prices = df['sold_price'].to_numpy()
mean_price = np.mean(sold_prices)
median_price = np.median(sold_prices)
strd_price = np.std(sold_prices)
var_price = np.var(sold_prices)

In [None]:
print(f'Mean Sold Price: {mean_price:,.2f}')
print(f'Median Sold Price:{median_price:,.2f}')
print(f'Standard Deviation: {strd_price:,.2f}')
print(f'Variance: {var_price:,.2f}')

In [None]:
#### Problem 12: Conditional Filtering with Multiple Criteria

In [None]:
### Problem 12: Conditional Filtering with Multiple Criteria
#Find all Condos or Townhouses that have parking_spaces >= 2 AND sold_price < 500000.

condo_or_townhouse = df[(df['property_type'].isin(['Condo','Townhouse'])) & (df['parking_spaces'] >= 2) & (df['sold_price'] < 500000 )]

condo_or_townhouse



