In [93]:
; Add code libraries

(require '[clojupyter.misc.helper :as helper])

(helper/add-dependencies '[org.clojure/data.csv "1.0.0"])
(helper/add-dependencies '[org.clojure/data.json "1.0.0"])
(helper/add-dependencies '[clj-http/clj-http "3.10.1"])
(helper/add-dependencies '[org.apache.commons/commons-math3 "3.6.1"])

(require '[clojure.string :as str]
         '[clojure.pprint :as pp]
         '[clojure.java.io :as io]
         '[clojure.data.csv :as csv]
         '[clojure.data.json :as json]
         '[clj-http.client :as http])
         
(import 'java.io.FileWriter
        'java.net.URLEncoder
        'java.math.RoundingMode
        'org.apache.commons.math3.stat.regression.SimpleRegression)

org.apache.commons.math3.stat.regression.SimpleRegression

In [2]:
; Define convenience functions

; Convert the CSV structure to a list-of-maps structure.
(defn to-maps [csv-data]
    (map zipmap (->> (first csv-data)
                    (map keyword)
                    repeat)
                (rest csv-data)))

; Ask statistic.gov.scot to execute the given SPARQL query
; and return its result as a list-of-maps.
(defn exec-query [sparql]
    (->> (http/post "http://statistics.gov.scot/sparql" 
                    {:body (str "query=" (URLEncoder/encode sparql)) 
                    :headers {"Accept" "text/csv" 
                              "Content-Type" "application/x-www-form-urlencoded"} 
                    :debug false})
        :body
        csv/read-csv
        to-maps))
        
; Compute 'the trend of y'.
; (Returns the gradient of a linear approximation to the curve decribed by xy-pairs.)
(defn trend [xy-pairs]
    (let [regression (SimpleRegression. true)]
        (doseq [[x y] xy-pairs]
            (.addData regression x y))
        (.getSlope regression)))

#'user/trend

In [97]:
; Query for the household waste solids data

(def sparql "

PREFIX qb: <http://purl.org/linked-data/cube#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX pdmx: <http://purl.org/linked-data/sdmx/2009/dimension#>
PREFIX sdmx: <http://statistics.gov.scot/def/dimension/>
PREFIX snum: <http://statistics.gov.scot/def/measure-properties/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT 
    ?year
    ?area
    ?endState
    ?material
    ?tonnes 
WHERE {
  
    ?tonnageObs qb:dataSet <http://statistics.gov.scot/data/household-waste> .
    ?tonnageObs pdmx:refArea ?areaUri .
    ?tonnageObs pdmx:refPeriod ?periodUri .
    ?tonnageObs sdmx:wasteCategory ?wasteCategoryUri .
    ?tonnageObs sdmx:wasteManagement ?wasteManagementUri .
    ?tonnageObs snum:count ?tonnes .
  
    ?areaUri rdfs:label ?area .
    ?periodUri rdfs:label ?year .
    ?wasteCategoryUri rdfs:label ?material .
    ?wasteManagementUri rdfs:label ?endState .
}
")

(def household-waste-solids 
    (->> sparql
        exec-query
        (map #(assoc % :year (.intValue (bigdec (:year %)))
                       :tonnes (.doubleValue (bigdec (:tonnes %)))))))

(println (count household-waste-solids ) "rows")

36432 rows


nil

In [98]:
; Print a sample

(def ks [:year :area :endState :material :tonnes])
(pp/print-table ks (repeatedly 10 #(rand-nth household-waste-solids )))


| :year |               :area |                         :endState |                                      :material | :tonnes |
|-------+---------------------+-----------------------------------+------------------------------------------------+---------|
|  2012 |               Moray |                   Other Diversion |                                          Soils |     0.0 |
|  2017 | West Dunbartonshire | Other Diversion (pre 2014 method) |                       Metallic wastes, ferrous |     0.0 |
|  2016 |   South Lanarkshire |                   Waste Generated |                    Animal and mixed food waste |  3153.0 |
|  2015 |    Shetland Islands |                   Other Diversion |                                   Glass wastes |     0.0 |
|  2017 |     Argyll and Bute |                   Other Diversion |              Health care and biological wastes |     0.0 |
|  2014 |   North Lanarkshire |                        Landfilled | Mineral waste from construction and demoli

nil

In [124]:
; Query for the population data

(def sparql "

PREFIX qb: <http://purl.org/linked-data/cube#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX pdmx: <http://purl.org/linked-data/sdmx/2009/dimension#>
PREFIX sdmx: <http://statistics.gov.scot/def/dimension/>
PREFIX snum: <http://statistics.gov.scot/def/measure-properties/>
PREFIX uent: <http://statistics.data.gov.uk/def/statistical-entity#>
PREFIX ugeo: <http://statistics.data.gov.uk/def/statistical-geography#>

SELECT 
  ?year
  ?area
  ?population

WHERE {
  VALUES ?areaType { 
           <http://statistics.gov.scot/id/statistical-entity/S92>
           <http://statistics.gov.scot/id/statistical-entity/S12> }
           
  ?areaUri uent:code ?areaType;
           ugeo:status 'Live' ;
           rdfs:label ?area .
           
  ?populationUri qb:dataSet <http://statistics.gov.scot/data/population-estimates-current-geographic-boundaries> ;
                 pdmx:refArea ?areaUri ;
                 pdmx:refPeriod ?periodUri ;
                 sdmx:age <http://statistics.gov.scot/def/concept/age/all> ;
                 sdmx:sex <http://statistics.gov.scot/def/concept/sex/all> ;
                 snum:count ?population .
  
  ?periodUri rdfs:label ?year .
}
")

(def population
    (->> sparql
        exec-query
        (map #(assoc % :year (.intValue (bigdec (:year %)))
                       :population (.intValue (bigdec (:population %)))))))

(println (count population ) "rows")

627 rows


nil

In [125]:
; Print a sample

(def ks [:year :area :population])
(pp/print-table ks (repeatedly 10 #(rand-nth population )))


| :year |              :area | :population |
|-------+--------------------+-------------|
|  2003 |   Clackmannanshire |       48140 |
|  2004 |     North Ayrshire |      136500 |
|  2012 |   Shetland Islands |       23210 |
|  2011 |    Argyll and Bute |       88930 |
|  2017 |     Orkney Islands |       22000 |
|  2001 | Na h-Eileanan Siar |       26450 |
|  2019 |    Argyll and Bute |       85870 |
|  2003 |     South Ayrshire |      111550 |
|  2001 |              Angus |      108370 |
|  2011 |   Scottish Borders |      113880 |


nil

In [128]:
; Query for the household data

(def sparql "

PREFIX qb: <http://purl.org/linked-data/cube#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX pdmx: <http://purl.org/linked-data/sdmx/2009/dimension#>
PREFIX sdmx: <http://statistics.gov.scot/def/dimension/>
PREFIX snum: <http://statistics.gov.scot/def/measure-properties/>
PREFIX uent: <http://statistics.data.gov.uk/def/statistical-entity#>
PREFIX ugeo: <http://statistics.data.gov.uk/def/statistical-geography#>

SELECT 
  ?year
  ?area
  ?count

WHERE {
  VALUES ?areaType { 
           <http://statistics.gov.scot/id/statistical-entity/S92>
           <http://statistics.gov.scot/id/statistical-entity/S12> }
           
  ?areaUri uent:code ?areaType;
           ugeo:status 'Live' ;
           rdfs:label ?area .
           
  ?householdUri qb:dataSet <http://statistics.gov.scot/data/mid-year-household-estimates> ;
                 pdmx:refArea ?areaUri ;
                 pdmx:refPeriod ?periodUri ;
                 snum:count ?count .
  
  ?periodUri rdfs:label ?year .
}
")

(def household
    (->> sparql
        exec-query
        (map #(assoc % :year (.intValue (bigdec (:year %)))
                       :count (.intValue (bigdec (:count %)))))))

(println (count household ) "rows")

627 rows


nil

In [129]:
; Print a sample

(def ks [:year :area :count])
(pp/print-table ks (repeatedly 10 #(rand-nth household )))


| :year |               :area | :count |
|-------+---------------------+--------|
|  2004 |   East Renfrewshire |  35764 |
|  2018 | East Dunbartonshire |  46023 |
|  2014 |   East Renfrewshire |  38048 |
|  2006 |  Na h-Eileanan Siar |  11779 |
|  2016 |      North Ayrshire |  63440 |
|  2008 |   Perth and Kinross |  64284 |
|  2001 |            Highland |  89618 |
|  2011 |   Perth and Kinross |  64905 |
|  2009 |        East Lothian |  42211 |
|  2015 |    Shetland Islands |  10235 |


nil

In [130]:
; Assemble the target data structure

(def population-indexed 
    (group-by (juxt :year :area) population))
    
(def household-indexed 
    (group-by (juxt :year :area) household))

(def target-data-struct
    (->> household-waste-solids
        (filter #(not (or ;; could be useful, so leave in but hide on the page
                          ;; (= "Scotland" (:area %))
                          ;; duplicated by & could clash with summing on the page, so remove
                          (= "Waste Generated" (:endState %))
                          (= "Total Waste" (:material %))
                          ;; confusingly duplicated in the non pre 2014 data, so remove
                          (= "Other Diversion (pre 2014 method)" (:endState %)) ;; remove
                          (= "Recycled (pre 2014 method)" (:endState %)))))
        (map #(if-let [population (-> population-indexed (get [(:year %) (:area %)]) first :population)]
                (assoc % :tonnesPerCitizen (/ (:tonnes %) population))
                %))
        (map #(if-let [count (-> household-indexed (get [(:year %) (:area %)]) first :count)]
                (assoc % :tonnesPerHousehold (/ (:tonnes %) count))
                %))))
         
(println (count target-data-struct) "rows")

17424 rows


nil

In [131]:
; Write, as JSON, to a file
         
(def file (io/file "dx-data.json"))
(binding [*out* (FileWriter. file)]
    (json/pprint target-data-struct))
(println "wrote the JSON file" (.getAbsolutePath file))

wrote the JSON file /Users/amc/workspace/data-commons-scotland/dcs-shorts/pivot-and-drill/dx/dx-data.json


nil