# UK House Prices Data Analysis Project

In [1]:
import polars as pl
import glob
import os
import numpy as np

# Let's first check what files exist
folder_path = r"C:\Users\Student\UK-Housing-Prices-Analysis"
csv_pattern = os.path.join(folder_path, "*.csv")

# Print the files found to debug
file_list = glob.glob(csv_pattern)
print("Files found:", file_list)

if not file_list:
    raise Exception("No CSV files found in the specified directory!")

# Combine all CSV files into one DataFrame
combined_df = pl.concat((pl.read_csv(file) for file in file_list))

# Function to save the merged dataset as CSV
def output(mergelist):
    csv_path = os.path.join(folder_path, "merge_dataset.csv")
    # Using write_csv instead of to_csv for Polars DataFrame
    mergelist.write_csv(csv_path)
    print(f'Saved to CSV at {csv_path}')
    return mergelist

# Call the output function with the combined DataFrame
output(combined_df)

Files found: ['C:\\Users\\Student\\UK-Housing-Prices-Analysis\\E&W Year_1995.csv', 'C:\\Users\\Student\\UK-Housing-Prices-Analysis\\E&W Year_1996.csv', 'C:\\Users\\Student\\UK-Housing-Prices-Analysis\\E&W Year_1997_1.csv', 'C:\\Users\\Student\\UK-Housing-Prices-Analysis\\E&W Year_1997_2.csv', 'C:\\Users\\Student\\UK-Housing-Prices-Analysis\\E&W Year_1998.csv', 'C:\\Users\\Student\\UK-Housing-Prices-Analysis\\E&W Year_1999_1.csv', 'C:\\Users\\Student\\UK-Housing-Prices-Analysis\\E&W Year_1999_2.csv', 'C:\\Users\\Student\\UK-Housing-Prices-Analysis\\E&W Year_2000_1.csv', 'C:\\Users\\Student\\UK-Housing-Prices-Analysis\\E&W Year_2000_2.csv', 'C:\\Users\\Student\\UK-Housing-Prices-Analysis\\E&W Year_2001_1.csv', 'C:\\Users\\Student\\UK-Housing-Prices-Analysis\\E&W Year_2001_2.csv', 'C:\\Users\\Student\\UK-Housing-Prices-Analysis\\E&W Year_2002_1.csv', 'C:\\Users\\Student\\UK-Housing-Prices-Analysis\\E&W Year_2002_2.csv', 'C:\\Users\\Student\\UK-Housing-Prices-Analysis\\E&W Year_2003_1.csv'

unique_id,price,date,post_code,property_type,whether_newbuild,freehold,address1,town,local_authority,county,record_status,year,month,quarter,region,country
str,i64,str,str,str,str,str,str,str,str,str,str,i64,i64,i64,str,str
"""{00005916-E189-4E1C-A85A-EB8CF…",82500,"""29/06/1995 00:00""","""NE49 9PB""","""D""","""Y""","""F""","""42""","""HALTWHISTLE""","""TYNEDALE""","""NORTHUMBERLAND""","""A""",1995,6,2,"""North East""","""England"""
"""{0000C0D6-F362-4810-8693-96897…",16000,"""28/07/1995 00:00""","""TS25 5SN""","""T""","""N""","""F""","""28""","""HARTLEPOOL""","""HARTLEPOOL""","""HARTLEPOOL""","""A""",1995,7,3,"""North East""","""England"""
"""{0005BD5B-5950-4590-B3E5-63200…",23000,"""11/08/1995 00:00""","""TS25 4HT""","""S""","""N""","""F""","""26""","""HARTLEPOOL""","""HARTLEPOOL""","""HARTLEPOOL""","""A""",1995,8,3,"""North East""","""England"""
"""{00074E3B-C045-46F3-A08B-83507…",21000,"""31/10/1995 00:00""","""DH9 0EP""","""T""","""N""","""F""","""2""","""STANLEY""","""DERWENTSIDE""","""DURHAM""","""A""",1995,10,4,"""North East""","""England"""
"""{0007B4EC-A358-437E-83CE-C489E…",60000,"""19/12/1995 00:00""","""NE65 8DR""","""T""","""N""","""F""","""4""","""MORPETH""","""ALNWICK""","""NORTHUMBERLAND""","""A""",1995,12,4,"""North East""","""England"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""{8D636320-2F5B-45ED-9ABC-FFF65…",290000,"""2013-12-11 00:00""","""BB12 9JL""","""S""","""N""","""F""","""DIMPENLEY HEAD FARM COTTAGE""","""BURNLEY""","""PENDLE""","""LANCASHIRE""","""A""",2013,12,4,"""North West""","""England"""
"""{1D8981E5-8EC9-4C51-AC74-FFFB1…",153000,"""2013-12-09 00:00""","""SK14 4EA""","""S""","""N""","""F""","""20""","""HYDE""","""TAMESIDE""","""GREATER MANCHESTER""","""A""",2013,12,4,"""North West""","""England"""
"""{D9B3A6C3-F8C4-440F-A0D1-FFFD8…",697000,"""2013-12-13 00:00""","""BR3 6QR""","""D""","""N""","""F""","""7""","""BECKENHAM""","""BROMLEY""","""GREATER LONDON""","""A""",2013,12,4,"""London""","""England"""
"""{471A7FF7-C0FA-4171-BF5B-FFFDE…",88000,"""2013-12-20 00:00""","""HR4 0AQ""","""F""","""N""","""L""","""12""","""HEREFORD""","""HEREFORDSHIRE""","""HEREFORDSHIRE""","""A""",2013,12,4,"""South West""","""England"""


In [5]:
# Summary statistics
combined_df.describe()

statistic,unique_id,price,date,post_code,property_type,whether_newbuild,freehold,address1,town,local_authority,county,record_status,year,month,quarter,region,country
str,str,f64,str,str,str,str,str,str,str,str,str,str,f64,f64,f64,str,str
"""count""","""18720390""",18720390.0,"""18720390""","""18720390""","""18720390""","""18720390""","""18720390""","""18720390""","""18720390""","""18720390""","""18720390""","""18720390""",18720390.0,18720390.0,18720390.0,"""18720390""","""18720390"""
"""null_count""","""0""",0.0,"""0""","""0""","""0""","""0""","""0""","""0""","""0""","""0""","""0""","""0""",0.0,0.0,0.0,"""0""","""0"""
"""mean""",,156211.54571,,,,,,,,,,,2003.327378,6.808984,2.596922,,
"""std""",,173010.586661,,,,,,,,,,,4.986825,3.30632,1.081937,,
"""min""","""{000000AC-478E-47B3-9B71-7C1E9…",5050.0,"""01/01/1995 00:00""",""" ""","""D""","""N""","""F""",""" ""","""ABBOTS LANGLEY""","""ABERCONWY""","""AVON""","""A""",1995.0,1.0,1.0,"""East Midlands""","""England"""
"""25%""",,69000.0,,,,,,,,,,,1999.0,4.0,2.0,,
"""50%""",,120000.0,,,,,,,,,,,2003.0,7.0,3.0,,
"""75%""",,190000.0,,,,,,,,,,,2007.0,10.0,4.0,,
"""max""","""{FFFFFE71-D9FC-4232-AD14-819C8…",54959000.0,"""31/12/2011 00:00""","""YO8 9YE""","""T""","""Y""","""U""","""ZYTEK HOUSE""","""YSTRAD MEURIG""","""YORK""","""YORK""","""A""",2013.0,12.0,4.0,"""Yorkshire and The Humber""","""Wales"""


In [10]:
# Data Visualisation
import matplotlib.pyplot as plt
import seaborn as sns

# Histogram
combined_df.hist(figsize=(10, 6))
plt.show()

# Boxplot
sns.boxplot(data=combined_df)
plt.show()

# Correlation Matrix
corr_matrix = combined_df.corr()
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', square=True)
plt.show()

AttributeError: 'DataFrame' object has no attribute 'hist'

In [9]:
#Missing Value Analysis
print(combined_df.isnull().sum())

AttributeError: 'DataFrame' object has no attribute 'isnull'

In [None]:
# Scatter plot
sns.scatterplot(x='column1', y='column2', data=mergelist)
plt.show()

In [None]:
# Pair Plots
sns.pairplot(mergelist)
plt.show()