## Import library

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px 

## Read and Show Data

In [None]:
RYD = pd.read_csv('RiyadhVillasAqar.csv')

In [None]:
RYD

## EDA

In [None]:
RYD.info()

In [None]:
RYD['lounges']=RYD['lounges'].str.replace('+', '').astype(float)
RYD['bathrooms']=RYD['bathrooms'].str.replace('+', '').astype(float)
RYD['apartments']=RYD['apartments'].str.replace('+', '').astype(float) 

In [None]:
RYD['apartments'].fillna(0, inplace=True) 

In [None]:
RYD.duplicated().value_counts()

In [None]:
RYD.drop(['Unnamed: 0'],axis=1,inplace=True)

In [None]:
RYD.isna().sum()

In [None]:
RYD['lounges'].fillna(RYD['lounges'].median(), inplace= True) 
RYD['streetWidth'].fillna(RYD['streetWidth'].median(), inplace= True)

In [None]:
RYD.describe().T

In [None]:
RYD.dropna(inplace=True)

In [None]:
RYD.isna().sum()

## Data Visualization

In [None]:
x = RYD.select_dtypes(include=['number'])
plt.figure(figsize=(25, 20))
sns.heatmap(np.abs(x.corr()), annot=True, cmap="Blues")
plt.title("Correlation Heatmap")
plt.show()

In [None]:
fig = px.scatter(RYD, x='neighbourhood', y='price')
fig.update_layout(title='Price by Neighbourhood',title_x=0.5,title_font=dict(size=24))
fig.update_xaxes(title='neighbourhood', title_font=dict(size=18))
fig.update_yaxes(title='Price', title_font=dict(size=18))
fig.update_xaxes(tickangle=90)
fig.show()

In [None]:
plt.figure(figsize=(8, 6))
fig = px.violin(data_frame=RYD ,x=RYD['duplex'], y=RYD['price'], color=RYD['location'] )
fig.update_layout(title='dublex and others price',title_x=0.5,title_font=dict(size=24))
fig.update_xaxes(title='Price', title_font=dict(size=18))
fig.update_yaxes(title='type of property', title_font=dict(size=18))
fig.show()

In [None]:
plt.figure(figsize=(6,3))
x = RYD.groupby(['location','pool']).mean('price').reset_index()
fig = px.bar(x, x='location',y='price')
fig.update_layout(title='Price Average with pool',title_x=0.5,title_font=dict(size=24))
fig.update_xaxes(title='location', title_font=dict(size=18))
fig.update_yaxes(title='price avarage', title_font=dict(size=18))
fig.show()

In [None]:
x = RYD.groupby('propertyAge')['price'].mean().reset_index()
fig = px.line(x, x='propertyAge', y='price')
fig.update_layout(title='Real Estate Age and Average Price',title_x=0.5,title_font=dict(size=24))
fig.update_xaxes(title='Property Age', title_font=dict(size=18))
fig.update_yaxes(title='Price', title_font=dict(size=18))
fig.show()

In [None]:
fig = px.histogram(RYD,x=RYD['neighbourhood'],y=RYD['price'])
fig.update_layout(title='Neighbourhood Prices',title_x=0.5,title_font=dict(size=24))
fig.update_xaxes(title='Neighbourhoods', title_font=dict(size=18))
fig.update_yaxes(title='sum of the price', title_font=dict(size=18))
fig.show()

In [None]:
plt.figure(figsize=(8, 6))
fig = px.ecdf(data_frame = RYD ,x='front',y='price', color='duplex')
fig.update_layout(title='propertys types and it is relation with the front',title_x=0.5,title_font=dict(size=24))
fig.update_xaxes(title='front', title_font=dict(size=18))
fig.update_yaxes(title='price', title_font=dict(size=18))
fig.show()

In [None]:
plt.figure(figsize=(15, 10))
fig = px.scatter(data_frame = RYD , x = "location" , y = 'propertyAge',size="space",size_max=60)
fig.update_layout(title='propertys size and it is relation with age and location',title_x=0.5,title_font=dict(size=24))
fig.update_yaxes(title='Property Age', title_font=dict(size=18))
fig.show()

In [None]:
RYD['pool_basement_combination'] = RYD['pool'].astype(str) + '_' + RYD['basement'].astype(str)
RYD['pool_basement_combination'] = RYD['pool_basement_combination'].map({'0.0_0.0': 'neither', '1.0_0.0': 'pool without basment','0.0_1.0' : 'basment without pool','1.0_1.0':'pool and basement'})

comb = RYD['pool_basement_combination'].value_counts()

fig = px.pie(comb, names=comb.index, values=comb.values)
fig.update_layout(title='pie plot shows the popularity of pools and basements',title_x=0.5,title_font=dict(size=24))
fig.show()
RYD.drop(['pool_basement_combination'],axis=1,inplace=True)

In [None]:
fig = px.scatter_3d(RYD, x='neighbourhood', y='space', z = 'price')
fig.update_layout(title='3D scatter',title_x=0.5,title_font=dict(size=24))
fig.show()

In [None]:
'''
loc = ['وسط الرياض' , 'جنوب الرياض' , 'شمال الرياض' , 'شرق الرياض' , 'غرب الرياض']
flat = [24.66835421558959 , 24.591065047160967 , 24.835434844497765 , 24.779778660959817 , 24.65360806573126]
flon = [46.72966953410291 , 46.764068844974204 , 46.649106402075496 , 46.828973205108724 , 46.573250026178705]



lat = []
lon = []
for city in RYD["location"]:
    for i in range(len(loc)):
        if loc[i] == city:
            lat.append(flat[i])
            lon.append(flon[i])
RYD["lat"] = lat
RYD["lon"] = lon
'''
print()

In [None]:
'''
fig = px.scatter_geo(RYD, lat=RYD["lat"] , lon=RYD["lon"], text=RYD["location"],
                     color=RYD["price"], size=RYD["space"], 
                     projection="natural earth",  color_continuous_scale="Viridis",
                     )

fig.update_geos(fitbounds="locations", visible=True)
fig.update_layout(title='the prices and space of the Aqar',title_x=0.5,title_font=dict(size=24))
fig.update_xaxes(title='latitude', title_font=dict(size=18))
fig.update_yaxes(title='longitude', title_font=dict(size=18))
fig.show()
'''
print()

<img  src="Screenshot 2023-11-12 024719.png" >