In [1]:
%install '.package(url: "https://github.com/mxcl/Path.swift", from: "0.16.1")' Path
%install '.package(url: "https://github.com/JustHTTP/Just", from: "0.7.1")' Just

Installing packages:
	.package(url: "https://github.com/mxcl/Path.swift", from: "0.16.1")
		Path
	.package(url: "https://github.com/JustHTTP/Just", from: "0.7.1")
		Just
With SwiftPM flags: []
Working in: /tmp/tmp5y3tg_1b
Fetching https://github.com/mxcl/Path.swift
Fetching https://github.com/JustHTTP/Just
Completed resolution in 2.08s
Cloning https://github.com/mxcl/Path.swift
Resolving https://github.com/mxcl/Path.swift at 0.16.2
Cloning https://github.com/JustHTTP/Just
Resolving https://github.com/JustHTTP/Just at 0.7.1
Compile Swift Module 'Path' (9 sources)
Compile Swift Module 'Just' (1 sources)
Compile Swift Module 'jupyterInstalledPackages' (1 sources)
Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPackages.so
Initializing Swift...
Loading library...
Installation complete!


## Getting the MNIST dataset

In [2]:
// export
import Foundation
import Just
import Path

In [3]:
//export
@discardableResult
public func shellCommand(_ launchPath: String, _ arguments: [String]) -> String?
{
    let task = Process()
    task.executableURL = URL(fileURLWithPath:launchPath)
    task.arguments = arguments

    let pipe = Pipe()
    task.standardOutput = pipe
    do {try task.run()} catch {print("Unexpected error: \(error).")}

    let data = pipe.fileHandleForReading.readDataToEndOfFile()
    let output = String(data: data, encoding: String.Encoding.utf8)

    return output
}

In [4]:
if let res = shellCommand("/bin/ls", ["-lh"]){print(res)}

total 1.2M
-rw-r----- 1 jekbradbury primarygroup  18K Apr 18 17:24 00_load_data.ipynb
-rw-r----- 1 jekbradbury primarygroup  15K Apr 18 17:24 01a_fastai_layers.ipynb
-rw-r----- 1 jekbradbury primarygroup  22K Apr 18 10:46 01b_sequential_layer.ipynb
-rw-r----- 1 jekbradbury primarygroup  25K Apr 18 17:24 01_matmul.ipynb
-rw-r----- 1 jekbradbury primarygroup  17K Apr 18 17:24 02a_why_sqrt5.ipynb
-rw-r----- 1 jekbradbury primarygroup  16K Apr 17 12:53 02b_initializing.ipynb
-rw-r----- 1 jekbradbury primarygroup  20K Apr 18 17:24 02_fully_connected.ipynb
-rw-r----- 1 jekbradbury primarygroup  22K Apr 18 17:24 03_minibatch_training.ipynb
-rw-r----- 1 jekbradbury primarygroup  27K Apr 18 17:24 04_callbacks.ipynb
-rw-r----- 1 jekbradbury primarygroup  71K Apr 18 17:24 05_anneal.ipynb
-rw-r----- 1 jekbradbury primarygroup  50K Apr 18 17:24 05b_early_stopping.ipynb
-rw-r----- 1 jekbradbury primarygroup 126K Apr 18 17:24 06_cuda.ipynb
-rw-r----- 1 jekbradbury primarygroup  16K Apr 1

In [5]:
//export
public func downloadFile(_ url: String, dest: String?=nil, force: Bool=false){
    let dest_name = (dest ?? (Path.cwd/url.split(separator: "/").last!).string)
    let url_dest = URL(fileURLWithPath: (dest ?? (Path.cwd/url.split(separator: "/").last!).string))
    if (force || !Path(dest_name)!.exists){
        print("Downloading \(url)...")
        if let cts = Just.get(url).content{
            do    {try cts.write(to: URL(fileURLWithPath:dest_name))}
            catch {print("Can't write to \(url_dest).\n\(error)")}
        } else {print("Can't reach \(url)")}
    }
}

In [6]:
downloadFile("https://storage.googleapis.com/cvdf-datasets/mnist/train-images-idx3-ubyte.gz")

Downloading https://storage.googleapis.com/cvdf-datasets/mnist/train-images-idx3-ubyte.gz...


In [7]:
//export
import TensorFlow

In [8]:
//export
protocol ConvertableFromByte {
    init(_ d:UInt8)
}

In [9]:
//export
extension Float : ConvertableFromByte{}
extension Int : ConvertableFromByte{}
extension Int32 : ConvertableFromByte{}

In [10]:
func readData<T:ConvertableFromByte & TensorFlowScalar>(_ fn:String, _ skip:Int) -> Tensor<T> {
    let data = try! Data(contentsOf: URL(fileURLWithPath: fn)).dropFirst(skip)
    return Tensor(data.map(T.init))
}

In [11]:
//export
func loadMNIST<T:ConvertableFromByte & TensorFlowScalar>(training: Bool, labels: Bool, path: Path, flat: Bool) -> Tensor<T> {
    let split = training ? "train" : "t10k"
    let kind = labels ? "labels" : "images"
    let batch = training ? 60000 : 10000
    let shape: TensorShape = labels ? [batch] : (flat ? [batch, 784] : [batch, 28, 28])
    let dropK = labels ? 8 : 16
    let baseUrl = "https://storage.googleapis.com/cvdf-datasets/mnist/"
    let fname = split + "-" + kind + "-idx\(labels ? 1 : 3)-ubyte"
    let file = path/fname
    if !file.exists {
        downloadFile("\(baseUrl)\(fname).gz", dest:(path/"\(fname).gz").string)
        shellCommand("/bin/gunzip", ["-fq", (path/"\(fname).gz").string])
    }
    let data = try! Data(contentsOf: URL(fileURLWithPath: file.string)).dropFirst(dropK)
    if labels { return Tensor(data.map(T.init)) }
    else      { return Tensor(data.map(T.init)).reshaped(to: shape)}
}

public func loadMNIST(path:Path, flat:Bool = false) -> (Tensor<Float>, Tensor<Int32>, Tensor<Float>, Tensor<Int32>) {
    try! path.mkdir(.p)
    return (
        loadMNIST(training: true,  labels: false, path: path, flat: flat) / 255.0,
        loadMNIST(training: true,  labels: true,  path: path, flat: flat),
        loadMNIST(training: false, labels: false, path: path, flat: flat) / 255.0,
        loadMNIST(training: false, labels: true,  path: path, flat: flat)
    )
}

In [12]:
//export
public let mnistPath = Path.home/".fastai"/"data"/"mnist_tst"

In [13]:
let (xTrain, yTrain, xValid, yValid) = loadMNIST(path: mnistPath)

In [14]:
xTrain.shape

▿ TensorShape
  ▿ dimensions : 3 elements
    - 0 : 60000
    - 1 : 28
    - 2 : 28


In [15]:
let (xTrain, yTrain, xValid, yValid) = loadMNIST(path: mnistPath, flat: true)
xTrain.shape

▿ TensorShape
  ▿ dimensions : 2 elements
    - 0 : 60000
    - 1 : 784


## Timing

In [16]:
//export 
import Dispatch
public func time(repeating: Int=1, _ function: () -> ()) {
    if repeating > 1 { function() }
    var times = [Double]()
    for _ in 1...repeating{
        let start = DispatchTime.now()
        function()
        let end = DispatchTime.now()
        let nanoseconds = Double(end.uptimeNanoseconds - start.uptimeNanoseconds)
        let milliseconds = nanoseconds / 1e6
        times.append(milliseconds)
    }
    print("\(times.reduce(0.0, +)/Double(times.count)) ms")
}

In [17]:
time(repeating:10) {var valImgs: Tensor<Float> = loadMNIST(training:false, labels: false, path: mnistPath, flat:false)}

205.060938 ms


## Export

In [18]:
// export
public extension String {
    func findFirst(pat:String) -> Range<String.Index>? {
        return range(of: pat, options: .regularExpression)
    }
    func matches(pat: String) -> Bool {
        return findFirst(pat:pat) != nil
    }
}

public func notebookToScript(fname: String){
    let url_fname = URL(fileURLWithPath: fname)
    let last = fname.lastPathComponent
    let out_fname = (url_fname.deletingLastPathComponent().appendingPathComponent("FastaiNotebooks", isDirectory: true)
                     .appendingPathComponent("Sources", isDirectory: true)
                     .appendingPathComponent("FastaiNotebooks", isDirectory: true).appendingPathComponent(last)
                     .deletingPathExtension().appendingPathExtension("swift"))
    do{
        let data = try Data(contentsOf: url_fname)
        let jsonData = try! JSONSerialization.jsonObject(with: data, options: .allowFragments) as! [String: Any]
        let cells = jsonData["cells"] as! [[String:Any]]
        var module = """
/*
THIS FILE WAS AUTOGENERATED! DO NOT EDIT!
file to edit: \(fname.lastPathComponent)

*/
        
"""
        for cell in cells{
            if let source = cell["source"] as? [String], !source.isEmpty, 
            source[0].matches(pat: #"^\s*//\s*export\s*$"#) {
                module.append("\n" + source[1...].joined() + "\n")
            }
        }
        try? module.write(to: out_fname, atomically: false, encoding: .utf8)
    } catch {print("Can't read the content of \(fname)")}
}

In [19]:
// export
public func exportNotebooks(_ path: Path){
    for entry in try! path.ls() where entry.kind == Entry.Kind.file && 
        entry.path.basename().matches(pat: #"^\d*_.*ipynb$"#) {
        print("Converting \(entry.path.basename())")
        notebookToScript(fname: entry.path.basename())
    }
}

In [21]:
notebookToScript(fname: (Path.cwd / "00_load_data.ipynb").string)