In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
df = pd.read_csv('ahmedabad_properties_cleaned_v2.csv').drop_duplicates()

In [None]:
df.head()

### property_type vs price

In [None]:
sns.barplot(x=df['property_type'], y=df['price'])

In [None]:
sns.boxplot(x=df['property_type'], y=df['price'])

### property_type vs area

In [None]:
sns.barplot(x=df['property_type'], y=df['built_up_area'])

In [None]:
sns.boxplot(x=df['property_type'], y=df['built_up_area'])

In [None]:
# removing that crazy outlier
df = df[df['built_up_area'] != 737147]

In [None]:
sns.boxplot(x=df['property_type'], y=df['built_up_area'])

### property_type vs price_per_sqft

In [None]:
sns.barplot(x=df['property_type'], y=df['price_per_sqft'], estimator=np.median)

In [None]:
sns.boxplot(x=df['property_type'], y=df['price_per_sqft'])

In [None]:
# check outliers
df[df['price_per_sqft'] > 100000][['property_type','society','sector','price','price_per_sqft','area','areaWithType', 'super_built_up_area', 'built_up_area', 'carpet_area']]

In [None]:
df.head()

In [None]:
sns.heatmap(pd.crosstab(df['property_type'],df['bedRoom']))

In [None]:
# checking outliers
df[df['bedRoom'] >= 10]

In [None]:
sns.barplot(x=df['property_type'],y=df['floorNum'])

In [None]:
sns.boxplot(x=df['property_type'],y=df['floorNum'])

In [None]:
# checking for outliers
df[(df['property_type'] == 'house') & (df['floorNum'] > 10)]

In [None]:
# conclusion houses(villa) but in appartments

In [None]:
df.head()

In [None]:
sns.heatmap(pd.crosstab(df['property_type'],df['agePossession']))

In [None]:
sns.heatmap(pd.pivot_table(df,index='property_type',columns='agePossession',values='price',aggfunc='mean'),annot=True)

In [None]:
plt.figure(figsize=(15,4))
sns.heatmap(pd.pivot_table(df,index='property_type',columns='bedRoom',values='price',aggfunc='mean'),annot=True)

In [None]:
sns.heatmap(pd.crosstab(df['property_type'],df['furnishing_type']))

In [None]:
sns.heatmap(pd.pivot_table(df,index='property_type',columns='furnishing_type',values='price',aggfunc='mean'),annot=True)

In [None]:
sns.barplot(x=df['property_type'],y=df['luxury_score'])

In [None]:
sns.boxplot(x=df['property_type'],y=df['luxury_score'])

In [None]:
df.head()

In [None]:
# sector analysis
plt.figure(figsize=(15,6))
sns.heatmap(pd.crosstab(df['property_type'],df['sector'].sort_index()))

In [None]:
# sector analysis
import re
# Group by 'sector' and calculate the average price
avg_price_per_sector = df.groupby('sector')['price'].mean().reset_index()

# Function to extract sector numbers
def extract_sector_number(sector_name):
    match = re.search(r'\d+', sector_name)
    if match:
        return int(match.group())
    else:
        return float('inf')  # Return a large number for non-numbered sectors

avg_price_per_sector['sector_number'] = avg_price_per_sector['sector'].apply(extract_sector_number)

# Sort by sector number
avg_price_per_sector_sorted_by_sector = avg_price_per_sector.sort_values(by='sector_number')

# Plot the heatmap
plt.figure(figsize=(5, 25))
sns.heatmap(avg_price_per_sector_sorted_by_sector.set_index('sector')[['price']], annot=True, fmt=".2f", linewidths=.5)
plt.title('Average Price per Sector (Sorted by Sector Number)')
plt.xlabel('Average Price')
plt.ylabel('Sector')
plt.show()


In [None]:
avg_price_per_sqft_sector = df.groupby('sector')['price_per_sqft'].mean().reset_index()

avg_price_per_sqft_sector['sector_number'] = avg_price_per_sqft_sector['sector'].apply(extract_sector_number)

# Sort by sector number
avg_price_per_sqft_sector_sorted_by_sector = avg_price_per_sqft_sector.sort_values(by='sector_number')

# Plot the heatmap
plt.figure(figsize=(5, 25))
sns.heatmap(avg_price_per_sqft_sector_sorted_by_sector.set_index('sector')[['price_per_sqft']], annot=True, fmt=".2f", linewidths=.5)
plt.title('Sector (Sorted by Sector Number)')
plt.xlabel('Average Price per sqft')
plt.ylabel('Sector')
plt.show()

In [None]:
luxury_score = df.groupby('sector')['luxury_score'].mean().reset_index()

luxury_score['sector_number'] = luxury_score['sector'].apply(extract_sector_number)

# Sort by sector number
luxury_score_sector = luxury_score.sort_values(by='sector_number')

# Plot the heatmap
plt.figure(figsize=(5, 25))
sns.heatmap(luxury_score_sector.set_index('sector')[['luxury_score']], annot=True, fmt=".2f", linewidths=.5)
plt.title('Sector (Sorted by Sector Number)')
plt.xlabel('Average Price per sqft')
plt.ylabel('Sector')
plt.show()

In [None]:
df.head()

### price

In [None]:
plt.figure(figsize=(12,8))
sns.scatterplot(df[df['area']<10000]['area'],df['price'],hue=df['bedRoom'])

In [None]:
plt.figure(figsize=(12,8))
sns.scatterplot(df[df['area']<10000]['area'],df['price'],hue=df['agePossession'])

In [None]:
plt.figure(figsize=(12,8))
# sns.scatterplot(df[df['area']<10000]['area'],df['price'],hue=df['furnishing_type'].astype('category'))

In [None]:
sns.barplot(x=df['bedRoom'],y=df['price'],estimator=np.median)

In [None]:
sns.barplot(x=df['agePossession'],y=df['price'])
plt.xticks(rotation='vertical')
plt.show()

In [None]:
sns.barplot(x=df['agePossession'],y=df['area'],estimator=np.median)
plt.xticks(rotation='vertical')
plt.show()

In [None]:
sns.barplot(x=df['furnishing_type'],y=df['price'],estimator=np.median)

In [None]:
sns.scatterplot(df['luxury_score'],df['price'])

### correlation

In [None]:
plt.figure(figsize=(8,8))
sns.heatmap(df.corr())

In [None]:
df.corr()['price'].sort_values(ascending=False)

In [None]:
df.head()

In [None]:
sns.pairplot(df)