# Project
***
### Cian Hogan
### GMIT Data Analytics 2020
### Programming for Data Analysis Module

Data I want to generate

|Year|Property Type|Location|Sqr Footage|Price (€)|
|-----|-----|-----|-----|----|
|    |     |      |     |    |

Use previous property data to predict future house prices and generate sample data

In [90]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re

In [151]:
data = pd.read_csv("PPR-ALL.csv", usecols=["Date of Sale (dd/mm/yyyy)", "County", "Price (€)"])
print(data.head())

  Date of Sale (dd/mm/yyyy)    County     Price (€)
0                  1/1/2010    Dublin  € 343,000.00
1                  3/1/2010     Laois  € 185,000.00
2                  4/1/2010    Dublin  € 438,500.00
3                  4/1/2010     Meath  € 400,000.00
4                  4/1/2010  Kilkenny  € 160,000.00


In [190]:
print(round(data["Price"].std())

804625.7033148435


In [153]:
data["Price"] = data.loc[:,"Price (€)"].str.slice(1,).str.replace(",", "").astype(float)

In [195]:
print(data.head())
data["Price"].min()

  Date of Sale (dd/mm/yyyy)    County     Price (€)     Price  Year
0                  1/1/2010    Dublin  € 343,000.00  343000.0  2010
1                  3/1/2010     Laois  € 185,000.00  185000.0  2010
2                  4/1/2010    Dublin  € 438,500.00  438500.0  2010
3                  4/1/2010     Meath  € 400,000.00  400000.0  2010
4                  4/1/2010  Kilkenny  € 160,000.00  160000.0  2010


5030.53

## Price by County
***

In [8]:
round(data["Price"].mean(),0)

255374.0

## Percentage of houses sold by county

In [155]:
countyCount = round(data["Price"].groupby(data["County"]).count()/len(data),4)

print(countyCount)

County
Carlow       0.0109
Cavan        0.0155
Clare        0.0229
Cork         0.1095
Donegal      0.0274
Dublin       0.3223
Galway       0.0505
Kerry        0.0290
Kildare      0.0500
Kilkenny     0.0157
Laois        0.0151
Leitrim      0.0090
Limerick     0.0374
Longford     0.0085
Louth        0.0266
Mayo         0.0237
Meath        0.0396
Monaghan     0.0078
Offaly       0.0119
Roscommon    0.0143
Sligo        0.0148
Tipperary    0.0268
Waterford    0.0264
Westmeath    0.0193
Wexford      0.0340
Wicklow      0.0311
Name: Price, dtype: float64


In [156]:
countyMedian = data["Price"].groupby(data["County"]).median()
countyMean = data["Price"].groupby(data["County"]).mean()

print(countyMedian)

County
Carlow       135000.00
Cavan         97500.00
Clare        132500.00
Cork         190000.00
Donegal      100000.00
Dublin       299560.00
Galway       170000.00
Kerry        136563.87
Kildare      241829.96
Kilkenny     155000.00
Laois        127753.30
Leitrim       82000.00
Limerick     145000.00
Longford      77500.00
Louth        162500.00
Mayo         105000.00
Meath        225000.00
Monaghan     115000.00
Offaly       121150.00
Roscommon     83000.00
Sligo        105000.00
Tipperary    120000.00
Waterford    135000.00
Westmeath    130000.00
Wexford      142500.00
Wicklow      260000.00
Name: Price, dtype: float64


In [189]:
medianRatio = (countyMedian/data["Price"].median())

print(medianRatio)

County
Carlow       0.699482
Cavan        0.505181
Clare        0.686528
Cork         0.984456
Donegal      0.518135
Dublin       1.552124
Galway       0.880829
Kerry        0.707585
Kildare      1.253005
Kilkenny     0.803109
Laois        0.661934
Leitrim      0.424870
Limerick     0.751295
Longford     0.401554
Louth        0.841969
Mayo         0.544041
Meath        1.165803
Monaghan     0.595855
Offaly       0.627720
Roscommon    0.430052
Sligo        0.544041
Tipperary    0.621762
Waterford    0.699482
Westmeath    0.673575
Wexford      0.738342
Wicklow      1.347150
Name: Price, dtype: float64


## Price by year
***


In [158]:
data["Year"] = pd.DatetimeIndex(data.loc[:,"Date of Sale (dd/mm/yyyy)"]).year

print(data.head())

  Date of Sale (dd/mm/yyyy)    County     Price (€)     Price  Year
0                  1/1/2010    Dublin  € 343,000.00  343000.0  2010
1                  3/1/2010     Laois  € 185,000.00  185000.0  2010
2                  4/1/2010    Dublin  € 438,500.00  438500.0  2010
3                  4/1/2010     Meath  € 400,000.00  400000.0  2010
4                  4/1/2010  Kilkenny  € 160,000.00  160000.0  2010


In [162]:
yearMedian = data.groupby("Year").median()

In [163]:
print(yearMedian)


          Price
Year           
2010  202000.00
2011  170000.00
2012  150000.00
2013  138000.00
2014  150000.00
2015  162500.00
2016  183000.00
2017  210000.00
2018  227000.00
2019  235881.99
2020  240000.00


In [164]:
yearMedian["Pct Change"] = yearMedian.pct_change()

In [224]:
print(yearMedian.mean())
print(yearMedian.std())

Price         188034.726364
Pct Change         0.022546
dtype: float64
Price         36857.728771
Pct Change        0.105846
dtype: float64


In [18]:
yearMedian["Pct Change"].mean()

2.254597992459668

## House Size/Type

In [19]:
#with open("avg-prices-by-type.txt") as filo:

# Generate Data

#### Generate county

In [247]:
sample = pd.DataFrame(columns=["Date", "County", "m_Squared", "Price"])

rng = np.random.default_rng()

sample["County"]= rng.choice(countyCount.index, p=countyCount.values, size=10000)

print(sample)

     Date   County m_Squared Price
0     NaN   Galway       NaN   NaN
1     NaN   Dublin       NaN   NaN
2     NaN     Cork       NaN   NaN
3     NaN  Leitrim       NaN   NaN
4     NaN   Dublin       NaN   NaN
...   ...      ...       ...   ...
9995  NaN    Meath       NaN   NaN
9996  NaN   Dublin       NaN   NaN
9997  NaN  Leitrim       NaN   NaN
9998  NaN     Mayo       NaN   NaN
9999  NaN   Dublin       NaN   NaN

[10000 rows x 4 columns]


#### Generate Date

In [248]:
dates = pd.date_range(start='1/1/2021', end='31/12/2025')
sample["Date"] = rng.choice(dates, size=len(sample))
sample["Year"] = pd.DatetimeIndex(sample.loc[:,"Date"]).year
print(sample.head())

        Date   County m_Squared Price  Year
0 2022-02-05   Galway       NaN   NaN  2022
1 2024-10-10   Dublin       NaN   NaN  2024
2 2024-04-11     Cork       NaN   NaN  2024
3 2022-04-01  Leitrim       NaN   NaN  2022
4 2021-05-30   Dublin       NaN   NaN  2021


#### Generate Sqr Footage

In [249]:
# avg square footage ireland = 81m2

print(data["Price"].mean()/81)
print(data["Price"].median()/81)

avgPerM = data["Price"].mean()/81

areaStd = (149-59)/4

print(areaStd)


3152.76818600237
2382.716049382716
22.5


In [250]:
sample["m_Squared"] = rng.normal(loc=81, scale=areaStd, size=len(sample))
sample["m_Squared"] = round(sample["m_Squared"])

print(sample.head())

        Date   County  m_Squared Price  Year
0 2022-02-05   Galway       89.0   NaN  2022
1 2024-10-10   Dublin       90.0   NaN  2024
2 2024-04-11     Cork       80.0   NaN  2024
3 2022-04-01  Leitrim       41.0   NaN  2022
4 2021-05-30   Dublin       46.0   NaN  2021


#### Generate Sale Price

In [251]:
medRatio = []

for i in sample["County"]:
    ratio = medianRatio[i]
    
    medRatio.append(ratio)



In [252]:
# Create random change in year avg
yearPct = {}
pct = 1
for year in range(2021, 2026):
    
    yearPct[year] = round(pct + rng.normal(loc=0.022546, scale=0.105846),4)

growth = []

for i in sample["Year"]:
    growpct = yearPct[i]
    
    growth.append(growpct)
    


In [253]:
msquarePrice = []

for i in sample["m_Squared"]:
        
    price = i * avgPerM
        
    msquarePrice.append(price)
        

In [254]:
medRatio = np.asarray(medRatio)
growth = np.asarray(growth)
msquarePrice = np.asarray(msquarePrice)

price = medRatio*growth*msquarePrice

In [255]:
sample["Price"] = price
sample["Price"] = sample["Price"].round()

In [256]:
print(sample.head())

        Date   County  m_Squared     Price  Year
0 2022-02-05   Galway       89.0  274345.0  2022
1 2024-10-10   Dublin       90.0  516606.0  2024
2 2024-04-11     Cork       80.0  291257.0  2024
3 2022-04-01  Leitrim       41.0   60961.0  2022
4 2021-05-30   Dublin       46.0  237571.0  2021


# Analyse Generated Data

Analyse the relationship between price and county|m2|Date

# References

https://propertypriceregister.ie/Website/NPSRA/pprweb.nsf/page/ppr-home-en

https://www.macrotrends.net/countries/IRL/ireland/gdp-growth-rate

https://static.rasset.ie/documents/news/2020/07/daft.pdf

https://www.finfacts-blog.com/2018/08/average-irish-housing-size-lowest-of.html#:~:text=What%20is%20striking%20about%20average,and%20107%20in%20the%20Netherlands

https://www.rightmove.co.uk/news/articles/property-news/values-per-square-metre-of-houses-in-england-and-wales-revealed/#:~:text=The%20average%20asking%20price%20per%20square%20metre%20of%20houses%20across,%C2%A310%2C427%20per%20square%20metre

https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/725085/Floor_Space_in_English_Homes_main_report.pdf