In [1]:
; Take NRS' data file about populations
; parse it
; put it into a simple datastucture
; and write it to a CSV file.


; Dependencies

(require '[clojupyter.misc.helper :as helper])

(helper/add-dependencies '[org.clojure/data.csv "1.0.0"])

(require '[clojure.java.io :as io])
(require '[clojure.pprint :as pp])
(require '[clojure.data.csv :as csv])

nil

In [2]:
; Get the CSV file of population oriented data from the National Records of Scotland at https://www.nrscotland.gov.uk/files//statistics/population-estimates/time-series/mid-18/mid-year-pop-est-18-time-series-2.csv

(def csv-file "mid-year-pop-est-18-time-series-2.csv")

#'user/csv-file

In [3]:
; Read the CSV file

(def csv-data
    (with-open [reader (io/reader csv-file)]
        (doall
            (csv/read-csv reader))))

(->> csv-data (take 6) println)

([Mid-year population estimates: Scotland and its council areas1, total population by sex: 1981 to 2018                                       ] [                                       ] [Code Persons 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018] [                                       ] [S92000003 Scotland 5,180,200 5,164,540 5,148,120 5,138,880 5,127,890 5,111,760 5,099,020 5,077,440 5,078,190 5,081,270 5,083,330 5,085,620 5,092,460 5,102,210 5,103,690 5,092,190 5,083,340 5,077,070 5,071,950 5,062,940 5,064,200 5,066,000 5,068,500 5,084,300 5,110,200 5,133,000 5,170,000 5,202,900 5,231,900 5,262,200 5,299,900 5,313,600 5,327,700  5,347,600  5,373,000 5,404,700 5,424,800 5,438,100] [S12000033 Aberdeen City 212,494 213,260 214,400 214,560 215,450 217,080 214,080 212,140 212,510 212,870 214,120 216,140 218,250 219,500 219,880 218,350 217,300 215

nil

In [4]:
; The councils are in the 2nd column, 5th row onwards for 32 rows

(def councils (->> csv-data (drop 5) (take 32) (map second) distinct vec))

(pp/print-table [:pos :type] (map-indexed #(hash-map :pos %1 :type %2) councils))


| :pos |                 :type |
|------+-----------------------|
|    0 |         Aberdeen City |
|    1 |         Aberdeenshire |
|    2 |                 Angus |
|    3 |       Argyll and Bute |
|    4 |     City of Edinburgh |
|    5 |      Clackmannanshire |
|    6 | Dumfries and Galloway |
|    7 |           Dundee City |
|    8 |         East Ayrshire |
|    9 |   East Dunbartonshire |
|   10 |          East Lothian |
|   11 |     East Renfrewshire |
|   12 |               Falkirk |
|   13 |                  Fife |
|   14 |          Glasgow City |
|   15 |              Highland |
|   16 |            Inverclyde |
|   17 |            Midlothian |
|   18 |                 Moray |
|   19 |    Na h-Eileanan Siar |
|   20 |        North Ayrshire |
|   21 |     North Lanarkshire |
|   22 |        Orkney Islands |
|   23 |     Perth and Kinross |
|   24 |          Renfrewshire |
|   25 |      Scottish Borders |
|   26 |      Shetland Islands |
|   27 |        South Ayrshire |
|   28 | 

nil

In [5]:
; The years are in the 3rd row, 33rd column onwards

(def years (->> (nth csv-data 2) (drop 32) distinct vec))

(pp/print-table [:pos :year] (map-indexed #(hash-map :pos %1 :year %2) years))


| :pos | :year |
|------+-------|
|    0 |  2011 |
|    1 |  2012 |
|    2 |  2013 |
|    3 |  2014 |
|    4 |  2015 |
|    5 |  2016 |
|    6 |  2017 |
|    7 |  2018 |


nil

In [6]:
; Get the population given the council and year

(defn row-ix [council] 
    (+ 5
       (.indexOf councils council)))

(defn col-ix [year] 
    (+ 32
       (.indexOf years year)))

(defn population [council year]
    (-> csv-data
        (nth (row-ix council))
        (nth (col-ix year))
        .trim
        (.replaceAll "," "")))

(assert (= "222460" (population (first councils) (first years))))

nil

In [7]:
; To a list of maps, each containing: council, year, popuation

(def population-data
    (for [ 
        council councils
        year years]
        {:council council
         :year year
         :population (population council year)}))

(count population-data)

256

In [8]:
; Sample 10 rows

(pp/print-table [:council :year :population] (repeatedly 5 #(rand-nth population-data)))


|           :council | :year | :population |
|--------------------+-------+-------------|
|  City of Edinburgh |  2016 |      507170 |
|  South Lanarkshire |  2014 |      315300 |
|      East Ayrshire |  2011 |      122690 |
|     North Ayrshire |  2011 |      138090 |
| Na h-Eileanan Siar |  2013 |       27400 |


nil

In [9]:
; Write to CSV

(let [header-row (->> population-data
                      first
                      keys
                      (map name))
      data-rows (->> population-data
                     (map vals))]
    (with-open [writer (io/writer "nrs-population-data.csv")]
      (csv/write-csv writer (cons header-row data-rows))))

nil