In [1]:
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd

In [2]:
sparql = SPARQLWrapper("https://landregistry.data.gov.uk/landregistry/query")
sparql.setReturnFormat(JSON)

In [3]:
# gets the first 3 geological ages
# from a Geological Timescale database,
# via a SPARQL endpoint
sparql.setQuery("""
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix owl: <http://www.w3.org/2002/07/owl#>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>
prefix sr: <http://data.ordnancesurvey.co.uk/ontology/spatialrelations/>
prefix ukhpi: <http://landregistry.data.gov.uk/def/ukhpi/>
prefix lrppi: <http://landregistry.data.gov.uk/def/ppi/>
prefix skos: <http://www.w3.org/2004/02/skos/core#>
prefix lrcommon: <http://landregistry.data.gov.uk/def/common/>

# House price index for all regions within a given date range
SELECT ?regionName ?code ?date ?hpi ?hpiDetached ?hpiFlatMaisonette ?hpiSemiDetached ?hpiTerraced ?averagePriceDetached ?averagePriceFlatMaisonette ?averagePriceSemiDetached ?averagePriceTerraced
{
    BIND( now() AS ?currentDateTime ) .
  BIND( CONCAT( str(year(?currentDateTime)-5), "-", str(month(?currentDateTime)), "-", str(day(?currentDateTime)) ) AS ?currentDateString ) .

  ?region ukhpi:refPeriodStart ?date;
          ukhpi:housePriceIndex ?hpi;
          ukhpi:housePriceIndexDetached ?hpiDetached;
          ukhpi:housePriceIndexFlatMaisonette ?hpiFlatMaisonette;
          ukhpi:housePriceIndexSemiDetached ?hpiSemiDetached;
          ukhpi:housePriceIndexTerraced ?hpiTerraced;
          ukhpi:averagePriceDetached ?averagePriceDetached;
          ukhpi:averagePriceFlatMaisonette ?averagePriceFlatMaisonette;
          ukhpi:averagePriceSemiDetached ?averagePriceSemiDetached;
          ukhpi:averagePriceTerraced ?averagePriceTerraced.

  ?region ukhpi:refRegion ?regionRef.
  
#  ?regionRef owl:sameAs ?code.
  ?regionRef rdfs:seeAlso ?code.
             
  ?regionRef rdfs:label ?regionName.
  

  FILTER (langMatches( lang(?regionName), "EN")&&
         ?date > xsd:date(?currentDateString))
             
  FILTER contains(str(?code),"gov")
}
    """
)

try:
    ret = sparql.queryAndConvert()

#     for r in ret["results"]["bindings"]:
#         print(r)
except Exception as e:
    print(e)

pd.json_normalize(ret["results"]["bindings"])

Unnamed: 0,regionName.type,regionName.xml:lang,regionName.value,code.type,code.value,date.type,date.datatype,date.value,hpi.type,hpi.datatype,...,averagePriceDetached.value,averagePriceFlatMaisonette.type,averagePriceFlatMaisonette.datatype,averagePriceFlatMaisonette.value,averagePriceSemiDetached.type,averagePriceSemiDetached.datatype,averagePriceSemiDetached.value,averagePriceTerraced.type,averagePriceTerraced.datatype,averagePriceTerraced.value
0,literal,en,South East,uri,http://statistics.data.gov.uk/id/statistical-g...,literal,http://www.w3.org/2001/XMLSchema#date,2018-02-01,literal,http://www.w3.org/2001/XMLSchema#decimal,...,552016,literal,http://www.w3.org/2001/XMLSchema#integer,205164,literal,http://www.w3.org/2001/XMLSchema#integer,339938,literal,http://www.w3.org/2001/XMLSchema#integer,267681
1,literal,en,South East,uri,http://statistics.data.gov.uk/id/statistical-g...,literal,http://www.w3.org/2001/XMLSchema#date,2018-03-01,literal,http://www.w3.org/2001/XMLSchema#decimal,...,548207,literal,http://www.w3.org/2001/XMLSchema#integer,203513,literal,http://www.w3.org/2001/XMLSchema#integer,339075,literal,http://www.w3.org/2001/XMLSchema#integer,267306
2,literal,en,South East,uri,http://statistics.data.gov.uk/id/statistical-g...,literal,http://www.w3.org/2001/XMLSchema#date,2018-04-01,literal,http://www.w3.org/2001/XMLSchema#decimal,...,549680,literal,http://www.w3.org/2001/XMLSchema#integer,203247,literal,http://www.w3.org/2001/XMLSchema#integer,341735,literal,http://www.w3.org/2001/XMLSchema#integer,269659
3,literal,en,South East,uri,http://statistics.data.gov.uk/id/statistical-g...,literal,http://www.w3.org/2001/XMLSchema#date,2018-05-01,literal,http://www.w3.org/2001/XMLSchema#decimal,...,552229,literal,http://www.w3.org/2001/XMLSchema#integer,204199,literal,http://www.w3.org/2001/XMLSchema#integer,341931,literal,http://www.w3.org/2001/XMLSchema#integer,268656
4,literal,en,South East,uri,http://statistics.data.gov.uk/id/statistical-g...,literal,http://www.w3.org/2001/XMLSchema#date,2018-06-01,literal,http://www.w3.org/2001/XMLSchema#decimal,...,555275,literal,http://www.w3.org/2001/XMLSchema#integer,206501,literal,http://www.w3.org/2001/XMLSchema#integer,345670,literal,http://www.w3.org/2001/XMLSchema#integer,270795
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23703,literal,en,Northern Ireland,uri,http://statistics.data.gov.uk/id/statistical-g...,literal,http://www.w3.org/2001/XMLSchema#date,2022-07-01,literal,http://www.w3.org/2001/XMLSchema#decimal,...,270991,literal,http://www.w3.org/2001/XMLSchema#integer,129230,literal,http://www.w3.org/2001/XMLSchema#integer,169956,literal,http://www.w3.org/2001/XMLSchema#integer,122673
23704,literal,en,Northern Ireland,uri,http://statistics.data.gov.uk/id/statistical-g...,literal,http://www.w3.org/2001/XMLSchema#date,2022-07-01,literal,http://www.w3.org/2001/XMLSchema#decimal,...,270991,literal,http://www.w3.org/2001/XMLSchema#integer,129230,literal,http://www.w3.org/2001/XMLSchema#integer,169956,literal,http://www.w3.org/2001/XMLSchema#integer,122673
23705,literal,en,Northern Ireland,uri,http://statistics.data.gov.uk/id/statistical-g...,literal,http://www.w3.org/2001/XMLSchema#date,2022-07-01,literal,http://www.w3.org/2001/XMLSchema#decimal,...,270991,literal,http://www.w3.org/2001/XMLSchema#integer,129230,literal,http://www.w3.org/2001/XMLSchema#integer,169956,literal,http://www.w3.org/2001/XMLSchema#integer,122673
23706,literal,en,Northern Ireland,uri,http://statistics.data.gov.uk/id/statistical-g...,literal,http://www.w3.org/2001/XMLSchema#date,2022-10-01,literal,http://www.w3.org/2001/XMLSchema#decimal,...,270991,literal,http://www.w3.org/2001/XMLSchema#integer,129230,literal,http://www.w3.org/2001/XMLSchema#integer,169956,literal,http://www.w3.org/2001/XMLSchema#integer,122673


In [4]:
df=pd.json_normalize(ret["results"]["bindings"])

In [5]:
df['code']=df['code.value'].str.split('/',expand=True)[5]

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23708 entries, 0 to 23707
Data columns (total 36 columns):
 #   Column                               Non-Null Count  Dtype 
---  ------                               --------------  ----- 
 0   regionName.type                      23708 non-null  object
 1   regionName.xml:lang                  23708 non-null  object
 2   regionName.value                     23708 non-null  object
 3   code.type                            23708 non-null  object
 4   code.value                           23708 non-null  object
 5   date.type                            23708 non-null  object
 6   date.datatype                        23708 non-null  object
 7   date.value                           23708 non-null  object
 8   hpi.type                             23708 non-null  object
 9   hpi.datatype                         23708 non-null  object
 10  hpi.value                            23708 non-null  object
 11  hpiDetached.type                     2370

In [7]:
df2=df[['regionName.value', 'code','date.value','hpi.value','hpiDetached.value','hpiFlatMaisonette.value','hpiSemiDetached.value','hpiTerraced.value', 'averagePriceDetached.value','averagePriceFlatMaisonette.value','averagePriceSemiDetached.value','averagePriceTerraced.value' ]]

In [8]:
df2

Unnamed: 0,regionName.value,code,date.value,hpi.value,hpiDetached.value,hpiFlatMaisonette.value,hpiSemiDetached.value,hpiTerraced.value,averagePriceDetached.value,averagePriceFlatMaisonette.value,averagePriceSemiDetached.value,averagePriceTerraced.value
0,South East,E12000008,2018-02-01,121.34,121.38,120.68,121.81,121.47,552016,205164,339938,267681
1,South East,E12000008,2018-03-01,120.78,120.55,119.71,121.51,121.30,548207,203513,339075,267306
2,South East,E12000008,2018-04-01,121.33,120.87,119.56,122.46,122.37,549680,203247,341735,269659
3,South East,E12000008,2018-05-01,121.51,121.43,120.12,122.53,121.92,552229,204199,341931,268656
4,South East,E12000008,2018-06-01,122.58,122.10,121.47,123.87,122.89,555275,206501,345670,270795
...,...,...,...,...,...,...,...,...,...,...,...,...
23703,Northern Ireland,N92000001,2022-07-01,158.76,161.26,147.88,155.61,162.58,270991,129230,169956,122673
23704,Northern Ireland,N92000001,2022-07-01,158.76,161.26,147.88,155.61,162.58,270991,129230,169956,122673
23705,Northern Ireland,N92000001,2022-07-01,158.76,161.26,147.88,155.61,162.58,270991,129230,169956,122673
23706,Northern Ireland,N92000001,2022-10-01,158.76,161.26,147.88,155.61,162.58,270991,129230,169956,122673


In [9]:
df2.to_csv('landreg.csv',index=False)