In [2]:
; Take SEPA's data file about waste tonnes
; parse it
; put it into a simple datastucture
; and write it to a CSV file.


; Dependencies

(require '[clojupyter.misc.helper :as helper])

(helper/add-dependencies '[org.clojure/data.csv "1.0.0"])

(require '[clojure.java.io :as io])
(require '[clojure.pprint :as pp])
(require '[clojure.data.csv :as csv])

nil

In [3]:
; Get the CSV file of waste oriented data from the SEPA tool at https://www.environment.gov.scot/data/data-analysis/household-waste/

(def csv-file "Household Waste Data Application - Scottish household waste generation and management (tonnes) by Local Authority.csv")

#'user/csv-file

In [4]:
; Read the CSV file

(def csv-data
    (with-open [reader (io/reader csv-file)]
        (doall
            (csv/read-csv reader :separator \tab))))

(->> csv-data (take 3) println)

([Local Authority Waste type Generated Generated Generated Generated Generated Generated Generated Generated Prepared for reuse Prepared for reuse Prepared for reuse Prepared for reuse Prepared for reuse Prepared for reuse Prepared for reuse Prepared for reuse Recycled Recycled Recycled Recycled Recycled Recycled Recycled Recycled Organics recycled Organics recycled Organics recycled Organics recycled Organics recycled Organics recycled Organics recycled Organics recycled Recovered by incineration Recovered by incineration Recovered by incineration Recovered by incineration Recovered by incineration Recovered by incineration Recovered by incineration Recovered by incineration Recovered by co-incineration Recovered by co-incineration Recovered by co-incineration Recovered by co-incineration Recovered by co-incineration Recovered by co-incineration Recovered by co-incineration Recovered by co-incineration Managed by other methods Managed by other methods Managed by other methods Managed 

nil

In [5]:
; The processes are in the 1st row, 3rd column onwards

(def processes (->> csv-data first (drop 2) distinct vec))

(pp/print-table [:pos :process] (map-indexed #(hash-map :pos %1 :process %2) processes))


| :pos |                     :process |
|------+------------------------------|
|    0 |                    Generated |
|    1 |           Prepared for reuse |
|    2 |                     Recycled |
|    3 |            Organics recycled |
|    4 |    Recovered by incineration |
|    5 | Recovered by co-incineration |
|    6 |     Managed by other methods |
|    7 |     Disposed by incineration |
|    8 |                   Landfilled |


nil

In [6]:
; The years are in the 2nd row, 3rd column onwards

(def years (->> csv-data second (drop 2) distinct vec))

(pp/print-table [:pos :year] (map-indexed #(hash-map :pos %1 :year %2) years))


| :pos | :year |
|------+-------|
|    0 |  2011 |
|    1 |  2012 |
|    2 |  2013 |
|    3 |  2014 |
|    4 |  2015 |
|    5 |  2016 |
|    6 |  2017 |
|    7 |  2018 |


nil

In [7]:
; The councils are in the 1st column, 3rd row onwards

(def councils (->> csv-data (drop 2) (map first) distinct vec))

(pp/print-table [:pos :council] (map-indexed #(hash-map :pos %1 :council %2) councils))


| :pos |              :council |
|------+-----------------------|
|    0 |         Aberdeen City |
|    1 |         Aberdeenshire |
|    2 |                 Angus |
|    3 |       Argyll and Bute |
|    4 |     City of Edinburgh |
|    5 |      Clackmannanshire |
|    6 | Dumfries and Galloway |
|    7 |           Dundee City |
|    8 |         East Ayrshire |
|    9 |   East Dunbartonshire |
|   10 |          East Lothian |
|   11 |     East Renfrewshire |
|   12 |               Falkirk |
|   13 |                  Fife |
|   14 |          Glasgow City |
|   15 |              Highland |
|   16 |            Inverclyde |
|   17 |            Midlothian |
|   18 |                 Moray |
|   19 |    Na h-Eileanan Siar |
|   20 |        North Ayrshire |
|   21 |     North Lanarkshire |
|   22 |        Orkney Islands |
|   23 |     Perth and Kinross |
|   24 |          Renfrewshire |
|   25 |      Scottish Borders |
|   26 |      Shetland Islands |
|   27 |        South Ayrshire |
|   28 | 

nil

In [8]:
; The types are in the 2nd column, 3rd row onwards

(def types (->> csv-data (drop 2) (map second) distinct vec))

(pp/print-table [:pos :type] (map-indexed #(hash-map :pos %1 :type %2) types))


| :pos |                                                                                 :type |
|------+---------------------------------------------------------------------------------------|
|    0 |                                                           Animal and mixed food waste |
|    1 |                                                     Batteries and accumulators wastes |
|    2 |                                                                       Chemical wastes |
|    3 |                                                                     Combustion wastes |
|    4 | Discarded equipment (excluding discarded vehicles, batteries and accumulators wastes) |
|    5 |                                                                    Discarded vehicles |
|    6 |                                                                          Glass wastes |
|    7 |                                                     Health care and biological wastes |
|    8 |                     

nil

In [9]:
; Get the tonnage given the council, year, type and process

(defn row-ix [council type] 
    (+ 2
       (* (count types) (.indexOf councils council))
       (.indexOf types type)))

(defn col-ix [process year] 
    (+ 2
       (* (count years) (.indexOf processes process))
       (.indexOf years year)))

(defn tonnage [council year type process]
    (-> csv-data
        (nth (row-ix council type))
        (nth (col-ix process year))))

(assert (= "9595.52" (tonnage (first councils) (first years) (first types) (first processes))))

nil

In [10]:
; To a list of maps, each containing: council, year, type, process, tonnage

(def waste-data
    (for [ 
        council councils
        year years
        type types
        process processes]
        {:council council
         :year year
         :type type
         :process process
         :tonnage (tonnage council year type process)}))

(count waste-data)

52992

In [11]:
; Sample 10 rows

(pp/print-table [:council :year :type :process :tonnage] (repeatedly 5 #(rand-nth waste-data)))


|          :council | :year |                       :type |                  :process | :tonnage |
|-------------------+-------+-----------------------------+---------------------------+----------|
| East Renfrewshire |  2018 |              Vegetal wastes |         Organics recycled |  8587.15 |
|   Argyll and Bute |  2013 |  Paper and cardboard wastes | Recovered by incineration |          |
| City of Edinburgh |  2016 |              Plastic wastes |         Organics recycled |          |
|             Moray |  2017 |                   Used oils |         Organics recycled |          |
|  Scottish Borders |  2014 | Animal and mixed food waste |         Organics recycled |          |


nil

In [12]:
; Write to CSV

(let [header-row (->> waste-data
                      first
                      keys
                      (map name))
      data-rows (->> waste-data
                     (map vals))]
    (with-open [writer (io/writer "sepa-waste-data.csv")]
      (csv/write-csv writer (cons header-row data-rows))))

nil