In [1]:
%install-location /notebooks/language2motion.gt/swift-install
%install-swiftpm-flags -c release
%install '.package(path: "/notebooks/language2motion.gt/code")' Datasets ModelSupport

Installing packages:
	.package(path: "/notebooks/language2motion.gt/code")
		Datasets
		ModelSupport
With SwiftPM flags: ['-c', 'release']
Working in: /tmp/tmpuqcp9d3s/swift-install
[1/2] Compiling jupyterInstalledPackages jupyterInstalledPackages.swift
Initializing Swift...
Installation complete!


In [2]:
import Foundation
import TensorFlow
import Datasets
import ModelSupport

In [3]:
%include "EnableIPythonDisplay.swift"
IPythonDisplay.shell.enable_matplotlib("inline")

('inline', 'module://ipykernel.pylab.backend_inline')


# load dataset

In [4]:
let dataURL = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/")

In [5]:
let batchSize = 10
let maxSequenceLength =  300 //600

print("batchSize: \(batchSize)")
print("maxSequenceLength: \(maxSequenceLength)")

let serializedDatasetURL = dataURL.appendingPathComponent("motion_dataset.motion_flag.normalized.plist")
let labelsURL = dataURL.appendingPathComponent("labels_ds_v2.csv")

print("\nLoading dataset...")
let dataset = try! Motion2Label(
    serializedDatasetURL: serializedDatasetURL,
    labelsURL: labelsURL,
    maxSequenceLength: maxSequenceLength,
    batchSize: batchSize
) { 
    // TODO: move this to dataset class
    (example: Motion2LabelExample) -> LabeledMotionBatch in
    let motionFrames = Tensor<Float>(example.motionSample.motionFramesArray)
    let motionFlag = Tensor<Int32>(motionFrames[0..., 44...44].squeezingShape(at: 1))
    let origMotionFramesCount = Tensor<Int32>(Int32(motionFrames.shape[0]))
    let motionBatch = MotionBatch(motionFrames: motionFrames, motionFlag: motionFlag, origMotionFramesCount: origMotionFramesCount)
    let label = Tensor<Int32>(Int32(example.label!.idx))
    return LabeledMotionBatch(data: motionBatch, label: label)
}

print("dataset.trainingExamples.count: \(dataset.trainingExamples.count)")
print("dataset.validationExamples.count: \(dataset.validationExamples.count)")

batchSize: 10
maxSequenceLength: 300

Loading dataset...
MotionDataset(motionSamples: 3911)
dataset.trainingExamples.count: 2410
dataset.validationExamples.count: 602


# balance dataset

In [6]:
let motionSamplesWithAnnotations = dataset.motionDataset.motionSamples.filter { $0.annotations.count>0 }
motionSamplesWithAnnotations.count

3012


In [None]:
// TODO: code upsampling

In [10]:
extension Motion2Label {
    public func filterSamples(_ motionSamples: [MotionSample], classIdx: Int) -> [MotionSample] {
        let motionSamplesForClass = motionSamples.filter {
            (ms: MotionSample) -> Bool in
            let labelTuple = self.getLabel(ms.sampleID)!
            return labelTuple.idx == classIdx
        }
        return motionSamplesForClass
    }
    
    public func balanceClassSamples(motionSamples: [MotionSample], numPerClass: Int, split: Double = 0.8) -> (trainSamples: [MotionSample], testSamples: [MotionSample]) {
        var allTrainSamples: [MotionSample] = []
        var allTestSamples: [MotionSample] = []

        for classIdx in (0..<dataset.labels.count) { 
            let samplesForClass = filterSamples(motionSamples, classIdx: classIdx)

            var sampledSamplesForClass: [MotionSample] = []
            var trainSamples: [MotionSample]
            var testSamples: [MotionSample]
            if samplesForClass.count >= numPerClass { // downsample
                let sampledSamplesForClass = Array(samplesForClass.choose(numPerClass))
                (trainSamples, testSamples) = sampledSamplesForClass.trainTestSplit(split: Float(split))
            } else { // upsample
                (trainSamples, testSamples) = samplesForClass.trainTestSplit(split: Float(split))
                let maxTrainPerClass = Int(Double(numPerClass)*split)
                trainSamples = (0..<maxTrainPerClass).map { (a) -> MotionSample in trainSamples.randomElement()! }
            }

            allTrainSamples.append(contentsOf: trainSamples)
            allTestSamples.append(contentsOf: testSamples)

            print((samplesForClass.count, trainSamples.count, testSamples.count))
        }
        allTrainSamples.shuffle()
        allTestSamples.shuffle()
        return (trainSamples: allTrainSamples, testSamples: allTestSamples)
    }
}

In [13]:
let numPerClass = 600

let (trainSamples, testSamples) = dataset.balanceClassSamples(motionSamples: motionSamplesWithAnnotations, numPerClass: numPerClass, split: 0.8)
(trainSamples.count, testSamples.count)

(1216, 480, 120)
(644, 480, 120)
(103, 480, 21)
(400, 480, 80)
(649, 480, 120)


▿ 2 elements
  - .0 : 2400
  - .1 : 461
