# Analysis of Real Estate in Malta

In [None]:
loan_years = 25
loan_rate = 0.03

vacancy_rate = 0.05
repair_rate = 0.05
management_rate = 0.1

## 1.1 - Loading the Dataset

In [None]:
import numpy as np
import pandas as pd

from scipy import stats

dataset = pd.read_csv('../dataset.csv')

In [None]:
dataset.loc[(dataset['Is_Sale'] == True), 'Case'] = 1
dataset.loc[(dataset['Is_Sale'] != True), 'Case'] = 2

dataset

In [None]:
res_sale = dataset[dataset['Case'] == 1]
res_sale = res_sale[(np.abs(stats.zscore(res_sale['Price'])) < 3)]

res_rent = dataset[dataset['Case'] == 2]
res_rent = res_rent[(np.abs(stats.zscore(res_rent['Price'])) < 3)]

In [None]:
print(f'Residential Sale {res_sale.shape[0]}')
print(f'Residential Rent {res_rent.shape[0]}')

## 1.2 - General Visuals

In [None]:
import plotly.express as px

chart_types = dataset['Case'].value_counts()
fig = px.pie(
    values=chart_types.values,
    names=['Residential Sale', 'Residential Rent'],
    title="Dataset by Case"
)

fig.show()

In [None]:
import plotly.express as px

chart_types = dataset['Province'].value_counts()
fig = px.pie(
    values=chart_types.values,
    names=chart_types.index,
    title="Dataset by Province"
)

fig.show()

In [None]:
fig = px.scatter_mapbox(
    dataset, lat=dataset['Latitude'], lon=dataset['Longitude'], color_discrete_sequence=[dataset['Case']],
    hover_name="Reference", hover_data=["Case", "Price"]
)

fig.update_layout(mapbox_style="carto-darkmatter")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

## 1.3 - Distribution

In [None]:
fig = px.box(res_sale, x="PropertyType", y="Price", title="Residential Sale - Price Vs Property Type")
fig.show()

In [None]:
fig = px.box(res_rent, x="PropertyType", y="Price", title="Residential Rent - Price Vs Property Type")
fig.show()

## 1.4 - Correlation between Sale and Rent

In [None]:
import seaborn as sn
import matplotlib.pyplot as plt

valid_cols = [
    'Reference', 'Province', 'Town', 'PropertyType', 'TotalRooms', 'TotalBedrooms',
    'TotalBathrooms', 'TotalSqm', 'TotalIntArea', 'TotalExtArea', 'Price'
]

corr_matrix = res_sale[valid_cols].corr()
sn.heatmap(corr_matrix, annot=True, cmap='BrBG')
plt.show()

In [None]:
corr_matrix = res_rent[valid_cols].corr()
sn.heatmap(corr_matrix, annot=True, cmap='BrBG')
plt.show()

## 1.5 - Aggregated Data

In [None]:
group_by_cols = [
    'PropertyType', 'Town', 'TotalBedrooms'
]

agg_fun = {
    'Reference': 'count',
    'TotalBathrooms': 'mean',
    'TotalIntArea': 'mean',
    'TotalExtArea': 'mean',
    'Price': ['min', 'max', 'mean']
}

In [None]:
res_sale_agg = res_sale.groupby(group_by_cols).agg(agg_fun).round(2)
res_rent_agg = res_rent.groupby(group_by_cols).agg(agg_fun).round(2)

res_sale_agg.columns = ['_'.join(col) for col in res_sale_agg.columns.values]
res_rent_agg.columns = ['_'.join(col) for col in res_rent_agg.columns.values]

In [None]:
res_agg = res_sale_agg.merge(res_rent_agg, on=group_by_cols)
res_agg_flt = res_agg[(res_agg['Reference_count_x'] > 1) & (res_agg['Reference_count_y'] > 1)].round(2).reset_index()

res_agg_flt

In [None]:
residential = pd.DataFrame()

residential[group_by_cols] = res_agg_flt[group_by_cols]

residential[[
    'Sale_Count', 'Rent_Count', 
    'Sale_Mean_Bathrooms', 'Rent_Mean_Bathrooms',
    'Sale_Mean_IntArea', 'Rent_Mean_IntArea',
    'Sale_Mean_ExtArea', 'Rent_Mean_ExtArea', 
    'Sale_Min_Price', 'Sale_Max_Price', 'Sale_Mean_Price',
    'Rent_Min_Price', 'Rent_Max_Price', 'Rent_Mean_Price'
]] = res_agg_flt[[
    'Reference_count_x', 'Reference_count_y',
    'TotalBathrooms_mean_x', 'TotalBathrooms_mean_y',
    'TotalIntArea_mean_x', 'TotalIntArea_mean_y',
    'TotalExtArea_mean_x', 'TotalExtArea_mean_y',
    'Price_min_x', 'Price_max_x', 'Price_mean_x',
    'Price_min_y', 'Price_max_y', 'Price_mean_y'
]]

residential['Repayment_Year'] = (residential['Sale_Mean_Price'] * loan_rate) / (1 - (1 + loan_rate) ** - loan_years)
residential['Repayment_Monthly'] = residential['Repayment_Year'] / 12

residential['Vacancy'] = residential['Rent_Mean_Price'] * vacancy_rate
residential['Repairs'] = residential['Rent_Mean_Price'] * repair_rate
residential['Management'] = residential['Rent_Mean_Price'] * management_rate

residential['Net_Rent_Year'] = (residential['Rent_Mean_Price'] * 12) - (
    residential['Repayment_Year'] + residential['Vacancy'] + residential['Repairs'] + residential['Management']
)

residential['Net_Rent_Monthly'] = residential['Net_Rent_Year'] / 12

residential['CoC_Return'] = residential['Net_Rent_Year'] / residential['Sale_Mean_Price']

residential = residential.round(2)
residential

## 1.06 - Top and Worse 10 Properties

In [None]:
res_coc = residential.sort_values('CoC_Return', ascending=False)
res_coc = res_coc[res_coc['PropertyType'] != 'Garage (Residential)']
res_coc.to_csv('output.csv')
res_coc

In [None]:
top_10 = res_coc.head(10)
top_10

In [None]:
worse_10 = res_coc.tail(10)
worse_10