In [1]:
// Swift package installation must go in the first cell.
// Set install-location so that all notebooks in this directory
// will share packages without having to recompile them.
// I think this only needs to be run once per host, then in the future
// you can skip this and go to cell 2 which imports things that have been precompiled.
%install-location $cwd/swift-jupyter-install-location
%install '.package(url: "https://github.com/mxcl/Path.swift", from: "1.2.0")' Path
%install '.package(url: "https://github.com/swiftcsv/SwiftCSV", from: "0.5.6")' SwiftCSV

Installing packages:
	.package(url: "https://github.com/mxcl/Path.swift", from: "1.2.0")
		Path
	.package(url: "https://github.com/swiftcsv/SwiftCSV", from: "0.5.6")
		SwiftCSV
With SwiftPM flags: []
Working in: /tmp/tmpfnp4f5ra/swift-install
Fetching https://github.com/swiftcsv/SwiftCSV
Fetching https://github.com/mxcl/Path.swift
Cloning https://github.com/mxcl/Path.swift
Resolving https://github.com/mxcl/Path.swift at 1.2.0
Cloning https://github.com/swiftcsv/SwiftCSV
Resolving https://github.com/swiftcsv/SwiftCSV at 0.5.6
[1/21] Compiling Path Pathish.swift
[2/21] Compiling Path Path+StringConvertibles.swift
[4/21] Compiling SwiftCSV NamedView.swift
[5/21] Compiling SwiftCSV Parser.swift
[6/21] Compiling Path Path+ls.swift
[7/21] Compiling SwiftCSV EnumeratedView.swift
[9/21] Compiling Path Path.swift
[10/21] Compiling SwiftCSV CSV.swift
[11/21] Compiling SwiftCSV Description.swift
[12/21] Compiling Path Path->Bool.swift
[17/22] Merging module SwiftCSV
[18/22] Wrapping AST for Swift

In [2]:
// Imports and Jupyter boiler plate
import Foundation
import FoundationNetworking
import Path
import PythonKit
import SwiftCSV
import TensorFlow

// This cell is here to display the plots in a Jupyter Notebook.
// Do not copy it into another environment.
%include "EnableIPythonDisplay.swift"
IPythonDisplay.shell.enable_matplotlib("inline")

let plt = Python.import("matplotlib.pyplot")
let fastaiDataExternal = Python.import("fastai.data.external")
let tarfile = Python.import("tarfile")

In [3]:
// TODO: move this to a library or use SwiftAI
func download(from source: URL, to destination: Path, force: Bool = false) {
    if (destination.exists && !force) {
        return
    }
    let data = try! Data.init(contentsOf: source)
    if (!destination.parent.exists) {
        try! destination.parent.mkdir(.p)
    }
    try! data.write(to: destination)
}

In [4]:
let dataPath = Path.cwd/"data"
// Full data set: fastaiDataExternal.URLs.ML_100k
// Sample data set: fastaiDataExternal.URLs.ML_SAMPLE
let movieLensURL = URL(string: String(fastaiDataExternal.URLs.ML_SAMPLE)!)!
let mlTgz = dataPath/movieLensURL.lastPathComponent
download(from: movieLensURL, to: mlTgz)
print(mlTgz)

/home/garymm/src/fastai/course-v3/nbs/dl1-swift/data/movie_lens_sample.tgz


In [5]:
let mlTgzTarfile = tarfile.open(mlTgz.string)
mlTgzTarfile.extractall(path: dataPath.string)
mlTgzTarfile.close()
print(dataPath.ls())

[Path(/home/garymm/src/fastai/course-v3/nbs/dl1-swift/data/movie_lens_sample), Path(/home/garymm/src/fastai/course-v3/nbs/dl1-swift/data/movie_lens_sample.tgz)]


In [6]:
let mlPath = dataPath/mlTgz.basename(dropExtension: true)
let ratingsCSV: CSV = CSV(url: URL(fileURLWithPath: mlPath.ls()[0].string), loadColumns: false)
print("loaded \(ratingsCSV.namedRows.count) rows with columns \(ratingsCSV.header)")

loaded 6031 rows with columns ["userId", "movieId", "rating", "timestamp"]


In [7]:
let userIdColIndex = ratingsCSV.header.firstIndex(of: "userId")!
let movieIdColIndex = ratingsCSV.header.firstIndex(of: "movieId")!
let ratingColIndex = ratingsCSV.header.firstIndex(of: "rating")!
let validModTen = 3
var trainRatingsArray : [Float] = [], validRatingsArray : [Float] = []
var trainIdsArray: [UInt16] = [], validIdsArray : [UInt16] = []
func addToArray(row: [String]) {
    if row.count < ratingColIndex + 1 {
        return
    }
    guard let userId = UInt16(row[userIdColIndex]) else {
        print("Failed to convert element \(userIdColIndex) of row to UInt16: \(row)")
        return
    }
    guard let movieId = UInt16(row[movieIdColIndex]) else {
        print("Failed to convert element \(movieIdColIndex) of row to UInt16: \(row)")
        return
    }
    guard let rating = Float(row[ratingColIndex]) else {
        print("Failed to convert to Float: \(row[ratingColIndex])")
        return
    }
    if (userId % 10 < validModTen) {
        validIdsArray.append(userId)
        validIdsArray.append(movieId)
        validRatingsArray.append(rating)
    } else {
        trainIdsArray.append(userId)
        trainIdsArray.append(movieId)
        trainRatingsArray.append(rating)
    }
}
ratingsCSV.enumerateAsArray(// Skip header
                            startAt: 1,
                            addToArray)
let trainRatings = Tensor<Float>(trainRatingsArray)
let validRatings = Tensor<Float>(validRatingsArray)
let trainIds = Tensor<UInt16>(shape: [trainIdsArray.count / 2, 2], scalars: trainIdsArray)
let validIds = Tensor<UInt16>(shape: [validIdsArray.count / 2, 2], scalars: validIdsArray)
print("Training Tensors:")
print("trainRatings.shape, trainIds.shape = \(trainRatings.shape) \(trainIds.shape)")
for i in 0...5 {
    print("Row \(i): \(trainIds[i][0]), \(trainIds[i][1]), \(trainRatings[i])")
}

Training Tensors:
trainRatings.shape, trainIds.shape = [4304] [4304, 2]
Row 0: 73, 1097, 4.0
Row 1: 157, 260, 3.5
Row 2: 358, 1210, 5.0
Row 3: 544, 2918, 5.0
Row 4: 213, 1200, 3.0
Row 5: 176, 2571, 4.5


In [None]:
let trainIdsHead = trainIds[0..<5]
print(trainIdsHead)
let trainIdsHeadParts = trainIdsHead.split(count: 2, alongAxis: 1)
let trainUserIdHead = trainIdsHeadParts[userIdColIndex].squeezingShape()
let trainMovieIdHead = trainIdsHeadParts[movieIdColIndex].squeezingShape()
print(trainUserIdHead)
print(trainMovieIdHead)

In [None]:
struct EmbeddingDotBias: Layer {
    init(nFactors: UInt, nUsers: UInt, nItems: UInt, yRange: (Float, Float)?) {
        self.uWeight = Embedding<Float>(vocabularySize: nUsers, embeddingSize: nFactors)
        self.iWeight = Embedding<Float>(vocabularySize: nItems, embeddingSize: nFactors)
        self.uBias = Embedding<Float>(vocabularySize: nUsers, embeddingSize: 1)
        self.iBias = Embedding<Float>(vocabularySize: nItems, embeddingSize: 1)
        self.yRange = yRange
    }
    @differentiable
    func callAsFunction(_ input: Tensor<UInt16>) -> Tensor<Float> {
        let inputParts = input.split(count: 2, alongAxis: 1)
        let users = inputParts[userIdColIndex].squeezingShape()
        let items = inputParts[movieIdColIndex].squeezingShape()
        let dotProd = self.uWeight[]
    }
}

In [None]:
var model = EmbeddingDotBias()