In [87]:
; Generate visualisations of the variations of waste values across geographic areas in Scotland.


; Add code libraries

(require '[clojupyter.misc.helper :as helper])

(helper/add-dependencies '[org.clojure/data.csv "1.0.0"])
(helper/add-dependencies '[metasoarous/oz "1.6.0-alpha24"])
(helper/add-dependencies '[clj-http/clj-http "3.10.1"])

(require '[clojure.string :as str]
         '[clojure.pprint :as pp]
         '[clojure.java.io :as io]
         '[clojure.data.csv :as csv]
         '[clj-http.client :as http]
         '[oz.notebook.clojupyter :as oz]
         '[oz.core :as ozcore])
(import 'java.net.URLEncoder)

java.net.URLEncoder

In [12]:
; Define convenience functions

; Convert the CSV structure to a list-of-maps structure.
(defn to-maps [csv-data]
    (map zipmap (->> (first csv-data)
                    (map keyword)
                    repeat)
                (rest csv-data)))

; Ask statistic.gov.scot to execute the given SPARQL query
; and return its result as a list-of-maps.
(defn exec-query [sparql]
    (->> (http/post "http://statistics.gov.scot/sparql" 
                    {:body (str "query=" (URLEncoder/encode sparql)) 
                    :headers {"Accept" "text/csv" 
                              "Content-Type" "application/x-www-form-urlencoded"} 
                    :debug false})
        :body
        csv/read-csv
        to-maps))

#'user/exec-query

In [47]:
; Query for the waste tonnage generated per council citizen per year

(def sparql "

PREFIX qb: <http://purl.org/linked-data/cube#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX pdmx: <http://purl.org/linked-data/sdmx/2009/dimension#>
PREFIX sdmx: <http://statistics.gov.scot/def/dimension/>
PREFIX snum: <http://statistics.gov.scot/def/measure-properties/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT 
    ?council 
    ?year 
    ?tonnagePerCitizen 
    (strafter(str(?areaUri), 'http://statistics.gov.scot/id/statistical-geography/') as ?councilCode) 
WHERE {
  
    ?tonnageObs qb:dataSet <http://statistics.gov.scot/data/household-waste> .
    ?tonnageObs pdmx:refArea ?areaUri .
    ?tonnageObs pdmx:refPeriod ?periodUri .
    ?tonnageObs sdmx:wasteCategory ?wasteCategoryUri .
    ?tonnageObs sdmx:wasteManagement ?wasteManagementUri .
    ?tonnageObs snum:count ?tonnage .
  
    ?wasteCategoryUri rdfs:label \"Total Waste\" .
    ?wasteManagementUri rdfs:label \"Waste Generated\" .

    ?populationObs qb:dataSet <http://statistics.gov.scot/data/population-estimates-current-geographic-boundaries> .
    ?populationObs pdmx:refArea ?areaUri .
    ?populationObs pdmx:refPeriod ?periodUri .
    ?populationObs sdmx:age <http://statistics.gov.scot/def/concept/age/all> .
    ?populationObs sdmx:sex <http://statistics.gov.scot/def/concept/sex/all> .
    ?populationObs snum:count ?population .

    ?areaUri rdfs:label ?council .
    ?periodUri rdfs:label ?year .
    BIND((xsd:integer(?tonnage)/xsd:integer(?population)) AS ?tonnagePerCitizen) .
}
")

(def tonnage-generated-per-council-citizen-per-year 
    (->> sparql
        exec-query
        (sort-by (juxt :c :y))))

(println (count tonnage-generated-per-council-citizen-per-year) "rows")

#'user/tonnage-generated-per-council-citizen-per-year

In [48]:
; Print a sample

(def ks [:council :councilCode :year :tonnagePerCitizen])
(pp/print-table ks (repeatedly 5 #(rand-nth tonnage-generated-per-council-citizen-per-year)))


|          :council | :councilCode | :year |         :tonnagePerCitizen |
|-------------------+--------------+-------+----------------------------|
|      West Lothian |    S12000040 |  2012 | 0.412090222146468950627805 |
|             Moray |    S12000020 |  2015 | 0.524719924615223536802429 |
|           Falkirk |    S12000014 |  2013 | 0.457813693051667090862815 |
|  Scottish Borders |    S12000026 |  2017 | 0.464884367936011128499391 |
| City of Edinburgh |    S12000036 |  2016 | 0.381199597768006782735572 |


nil

In [133]:
; Derive 3 subsets of data:
;   * 'recent'  - 2018's tonnage of waste generated per council citizen
;   * 'average' - 2011-2018's average tonnage of waste generated per council citizen
;   * 'trend'   - 2011-2018's difference in tonnage of waste generated per council citizen

(def tonnage-generated-per-council-citizen-based
    (let [base-data tonnage-generated-per-council-citizen-per-year]
        (for [council (->> base-data (map :council) distinct)]
            {:council council
             :recent (->> base-data 
                         (filter #(and (= council (:council %)) (= "2018" (:year %)))) 
                         first 
                         :tonnagePerCitizen)
             :average (->> base-data 
                         (filter #(= council (:council %))) 
                         (map :tonnagePerCitizen)
                         (map bigdec)
                         (apply +) 
                         (#(/ % 8)))
              :trend ;; TODO: maybe compute the Ordinary Least Squares coefficient
                     ;; but compute a simple subtraction for now
                     (- (->> base-data 
                             (filter #(and (= council (:council %)) (= "2018" (:year %)))) 
                             first 
                             :tonnagePerCitizen
                             bigdec)
                        (->> base-data 
                             (filter #(and (= council (:council %)) (= "2011" (:year %)))) 
                             first 
                             :tonnagePerCitizen
                             bigdec))})))
            
(println (count tonnage-generated-per-council-citizen-based) "rows")

33 rows


nil

In [135]:
; Print a sample

(def ks [:council :recent :average :trend])
(pp/print-table ks (repeatedly 5 #(rand-nth tonnage-generated-per-council-citizen-based)))


|          :council |                    :recent |                      :average |                      :trend |
|-------------------+----------------------------+-------------------------------+-----------------------------|
| East Renfrewshire | 0.461563517915309446254072 | 0.492373234778840765744668375 | -0.036079913424851328990945 |
|    Orkney Islands | 0.460612888688598467778279 | 0.470190287581728018161481625 | -0.018145281246042055097538 |
|      East Lothian | 0.473901124870025522261083 |  0.49911582824953486857742025 | -0.058154519645586967730911 |
| North Lanarkshire | 0.429698982891410429772473 | 0.457329635901437797113742625 | -0.044794680498379929104703 |
|       Dundee City |  0.40855798319327731092437 |  0.44217890463213453189780425 | -0.055898538545853123858239 |


nil

In [136]:
; Store tonnage-generated-per-council-citizen-based in a CSV file for subsequent use by the Vega chart

(def filename "tonnage-generated-per-council-citizen-based.csv")

(let [file (io/file filename)
      header-row (->> tonnage-generated-per-council-citizen-based
                      first
                      keys
                      (map name))
      data-rows (->> tonnage-generated-per-council-citizen-based
                     (map vals))]
    (with-open [writer (io/writer file)]
      (csv/write-csv writer (cons header-row data-rows)))
      
    (println "Wrote to" (.getAbsolutePath file)))

Wrote to /Users/amc/workspace/data-commons-scotland/dcs-shorts/choropleth-generation/tonnage-generated-per-council-citizen-based.csv


nil

In [139]:
; Choropleth of the waste generated per council citizen in 2018

(def repo-dir "https://raw.githubusercontent.com/data-commons-scotland/dcs-shorts/topojson-experiment/choropleth-generation/")

(def chart-spec {:$schema "https://vega.github.io/schema/vega-lite/v4.json"
                 :repeat {:row ["2018 tonnage" 
                                "2011-2018 average tonnage"
                                "2011-2018 change in tonnage"]}
                 :resolve {:scale {:color "independent"}}
                 :spec {
                     :width "500"
                     :height "500"
                     :data {:url (str repo-dir "topo_lad.json")
                            :format {:type "topojson" :feature "lad"}}
                     :transform [;; Cross reference by council name rather than council code
                                 ;; because the topoJSON data uses some obsolete codes (etc.).
                                 {:lookup "properties['LAD13NM']" 
                                  :from {:data {:url (str repo-dir filename)}
                                         :key "council"
                                         :fields ["recent" "average" "trend"]}
                                  :as ["2018 tonnage" 
                                       "2011-2018 average tonnage"
                                       "2011-2018 change in tonnage"]}]
                     :projection {:type "albers" :rotate [0, 0, 0]}
                     :mark {:type "geoshape" :strokeWidth 0.2 :stroke "black"}
                     :encoding {:tooltip [{:title "council" :field "properties['LAD13NM']" :type "nominal"}
                                          {:field {:repeat "row"} :type "quantitative"}]
                                :color {;:title "tonnage per citizen"
                                        :field {:repeat "row"}
                                        :type "quantitative"}}}})

; (print (json/write-str chart-spec))

; (ozcore/export! [:div [:vega-lite chart-spec]] "choropleth.html" {:from-format :hiccup :to-format :html})

(oz/view! chart-spec)

to-format: :html
[I 16:26:52.184 Clojupyter] oz.core:425 -- input: /var/folders/wl/ff7688p93t1b3tm2l0bv93yc0000gn/T/9dfcce6d-8644-4a0d-8002-e5f177ea02bb7040976134973307118.vl.json
[I 16:26:52.191 Clojupyter] oz.core:426 -- output: /var/folders/wl/ff7688p93t1b3tm2l0bv93yc0000gn/T/607c77a6-dbcf-473b-a4d2-2cd5aae261e22047451304767038196.png
