In [1]:
; Take SEPA's dataset about waste tonnes and NRS' dataset about populations
; add these into to triplestore
; to make them available as "linked data".


; Dependencies

(require '[clojupyter.misc.helper :as helper])
(require '[clojupyter.display :as display])

(helper/add-dependencies '[org.clojure/data.csv "1.0.0"])
(helper/add-dependencies '[com.datomic/client-pro "0.9.41"])

(require '[clojure.java.io :as io])
(require '[clojure.pprint :as pp])
(require '[clojure.data.csv :as csv])
(require '[datomic.client.api :as d])

nil

In [2]:
; Connect to the triplestore (a Datomic database management server)

(def dbms-client
    (d/client { 
        :server-type :peer-server
        :access-key "ash"
        :secret "ash123"
        :endpoint "localhost:8998"
        :validate-hostnames false}))

(def conn (d/connect dbms-client {:db-name "waste"}))
(println conn)

{:db-name waste, :database-id 5e971c8d-9cea-4b08-b05a-3852aecd482b, :t 66, :next-t 1000, :type :datomic.client/conn}


nil

In [3]:
; Add facts about councils

(def schema-facts [  
    {:db/ident :council/name
        :db/valueType :db.type/string
        :db/unique :db.unique/identity
        :db/cardinality :db.cardinality/one
        :db/doc "A council"}])

; A convenience function
(defn transact [conn data desc]
    (let [n (count (:tempids (d/transact conn {:tx-data data})))
          s (if (= 1 n) "y" "ies")]
        (println (str n " " desc " entit" s " inserted"))))

(transact conn schema-facts "council schema")

(def councils ["Aberdeen City" "Aberdeenshire" "Angus" "Argyll and Bute" "City of Edinburgh" "Clackmannanshire" 
         "Dumfries and Galloway" "Dundee City" "East Ayrshire" "East Dunbartonshire" "East Lothian" "East Renfrewshire" 
         "Falkirk" "Fife" "Glasgow City" "Highland" "Inverclyde" "Midlothian" "Moray" "Na h-Eileanan Siar" 
         "North Ayrshire" "North Lanarkshire" "Orkney Islands" "Perth and Kinross" "Renfrewshire" "Scottish Borders" 
         "Shetland Islands" "South Ayrshire" "South Lanarkshire" "Stirling" "West Dunbartonshire" "West Lothian"])

(def council-facts (map #(hash-map :council/name %) councils))

(transact conn council-facts "council data")

1 council schema entity inserted
32 council data entities inserted


nil

In [4]:
; Add facts about years

(def schema-facts [  
    {:db/ident :year/value
        :db/valueType :db.type/long
        :db/unique :db.unique/identity
        :db/cardinality :db.cardinality/one
        :db/doc "A year"}])

(transact conn schema-facts "year schema")

(def years [2011 2012 2013 2014 2015 2016 2017 2018])

(def year-facts (map #(hash-map :year/value %) years))

(transact conn year-facts "year data")

1 year schema entity inserted
8 year data entities inserted


nil

In [5]:
; Read the sepa-waste-data file

; A convenience function
(defn to-maps [csv-data]
  (map zipmap
       (->> (first csv-data)
            (map keyword)
            repeat)
        (rest csv-data)))

(def sepa-waste-data
    (with-open [reader (io/reader "sepa-waste-data.csv")]
        (doall
            (to-maps (csv/read-csv reader)))))

; Print a sample
(pp/print-table [:council :year :type :process :tonnage] (repeatedly 5 #(rand-nth sepa-waste-data)))


|          :council | :year |                                                                                 :type |                     :process | :tonnage |
|-------------------+-------+---------------------------------------------------------------------------------------+------------------------------+----------|
|           Falkirk |  2016 | Discarded equipment (excluding discarded vehicles, batteries and accumulators wastes) |           Prepared for reuse |          |
| North Lanarkshire |  2016 |                                                                          Glass wastes |                     Recycled |  6697.21 |
|          Highland |  2017 |                                        Mineral waste from construction and demolition |            Organics recycled |          |
|      East Lothian |  2016 |                                        Mineral waste from construction and demolition | Recovered by co-incineration |          |
|        Inverclyde |  2017 |          

nil

In [6]:
; Add facts about waste types

(def schema-facts [     
    {:db/ident :waste-type/name
        :db/valueType :db.type/string
        :db/unique :db.unique/identity
        :db/cardinality :db.cardinality/one
        :db/doc "A waste type"}])

(transact conn schema-facts "waste-type schema")

(def waste-types (distinct (map :type sepa-waste-data)))

(def waste-type-facts (map #(hash-map :waste-type/name %) waste-types))

(transact conn waste-type-facts "waste-type data")

1 waste-type schema entity inserted
23 waste-type data entities inserted


nil

In [7]:
; Add facts about waste processes

(def schema-facts [     
    {:db/ident :waste-process/name
        :db/valueType :db.type/string
        :db/unique :db.unique/identity
        :db/cardinality :db.cardinality/one
        :db/doc "A waste process"}])

(transact conn schema-facts "waste-process schema")

(def waste-processes (distinct (map :process sepa-waste-data)))

(def waste-process-facts (map #(hash-map :waste-process/name %) waste-processes))

(transact conn waste-process-facts "waste-process data")

1 waste-process schema entity inserted
9 waste-process data entities inserted


nil

In [8]:
; Add facts about waste tonnes

(def schema-facts [  
    ; TODO uniqueness contraint over conncil + year + type + process
    {:db/ident :waste-tonnes-cytp/council
        :db/valueType :db.type/ref
        :db/cardinality :db.cardinality/one
        :db/doc "A council that is associated with this entity"}          
    {:db/ident :waste-tonnes-cytp/year
        :db/valueType :db.type/ref
        :db/cardinality :db.cardinality/one
        :db/doc "A year that is associated with this entity"}
      {:db/ident :waste-tonnes-cytp/waste-type
        :db/valueType :db.type/ref
        :db/cardinality :db.cardinality/one
        :db/doc "A waste type that is associated with this entity"}                
     {:db/ident :waste-tonnes-cytp/waste-process
        :db/valueType :db.type/ref
        :db/cardinality :db.cardinality/one
        :db/doc "A waste process that is associated with this entity"}              
     {:db/ident :waste-tonnes-cytp/tonnes
        :db/valueType :db.type/double
        :db/cardinality :db.cardinality/one
        :db/doc "A tonnage quantity"}])

(transact conn schema-facts "waste-tonnes-cytp schema")

(def waste-tonnes (filter #(not (clojure.string/blank? (:tonnage %))) sepa-waste-data))

(def waste-tonnes-facts 
    (map #(hash-map :waste-tonnes-cytp/council [:council/name (:council %)]
                    :waste-tonnes-cytp/year [:year/value (Integer/parseInt (:year %))]
                    :waste-tonnes-cytp/waste-type [:waste-type/name (:type %)]
                    :waste-tonnes-cytp/waste-process [:waste-process/name (:process %)]
                    :waste-tonnes-cytp/tonnes (Double/parseDouble (:tonnage %)))
         waste-tonnes))

(transact conn waste-tonnes-facts "waste-tonnes-cytp")

5 waste-tonnes-cytp schema entities inserted
23240 waste-tonnes-cytp entities inserted


nil

In [9]:
; Read the nrs-population-data file

(def nrs-population-data
    (with-open [reader (io/reader "nrs-population-data.csv")]
        (doall
            (to-maps (csv/read-csv reader)))))

; Print a sample
(pp/print-table [:council :year :population] (repeatedly 5 #(rand-nth nrs-population-data)))


|          :council | :year | :population |
|-------------------+-------+-------------|
|      Glasgow City |  2016 |      615070 |
|  Shetland Islands |  2015 |       23200 |
| North Lanarkshire |  2018 |      340180 |
| East Renfrewshire |  2012 |       91040 |
|      Renfrewshire |  2016 |      175930 |


nil

In [10]:
; Add facts about populations

(def schema-facts [
    ; TODO uniqueness contraint over conncil + year
    {:db/ident :population-cy/council
        :db/valueType :db.type/ref
        :db/cardinality :db.cardinality/one
        :db/doc "A council that is associated with this entity"}          
    {:db/ident :population-cy/year
        :db/valueType :db.type/ref
        :db/cardinality :db.cardinality/one
        :db/doc "A year that is associated with this entity"}
     {:db/ident :population-cy/population
        :db/valueType :db.type/long
        :db/cardinality :db.cardinality/one
        :db/doc "A population quantity"}]) 

(transact conn schema-facts "population schema")

(def population-facts 
    (map #(hash-map :population-cy/council [:council/name (:council %)]
                    :population-cy/year [:year/value (Integer/parseInt (:year %))]
                    :population-cy/population (Long/parseLong (:population %)))
         nrs-population-data))

(transact conn population-facts "population-cy")

3 population schema entities inserted
256 population-cy entities inserted


nil

In [11]:
; Check for the expected numbers of facts

(def db (d/db conn))

; Expect 32 :council/name facts
(assert (= 32 (ffirst (d/q '[:find (count ?e) :where [?e :council/name]] db))))

; Expect 8 :year/value facts
(assert (= 8 (ffirst (d/q '[:find (count ?e) :where [?e :year/value]] db))))

; Expect 23 :waste-type/name facts
(assert (= 23 (ffirst (d/q '[:find (count ?e) :where [?e :waste-type/name]] db))))
                            
; Expect 9 :waste-process/name facts
(assert (= 9 (ffirst (d/q '[:find (count ?e) :where [?e :waste-process/name]] db))))

; Expect 23240 :waste-tonnes-cytp/tonnes facts
(assert (= 23240 (ffirst (d/q '[:find (count ?e) :where [?e :waste-tonnes-cytp/tonnes]] db))))

; Expect 256 :population-cy/population facts
(assert (= 256 (ffirst (d/q '[:find (count ?e) :where [?e :population-cy/population]] db))))

nil