In [9]:
:dep burn = {version = "0.12.1", features = ["ndarray", "wgpu", "candle"]}

In [10]:
/// maybe you will meet some trouble in this step,
/// this info maybe help you：
/// 1. check if it is installed `hdf5` in your computer, in mac os: brew install hdfs
/// 2. check `HDF5_LIB` and `HDF5_INCLUDE`; you can run commend: `find / -name hdf5` to find it
/// 3. check if your computer is install `cmake`. if not, install it. in mac os: brew install cmake
:dep hdf5 = { version = "0.8.1", features = ["blosc", "lzf"]}
:dep hdf5-sys = { version = "0.8.1", features = ["static", "zlib"]}

In [11]:
:dep ndarray = "0.15.0"

In [12]:
:show_deps

burn = {version = "0.12.1", features = ["ndarray", "wgpu", "candle"]}
hdf5 = { version = "0.8.1", features = ["blosc", "lzf"]}
hdf5-sys = { version = "0.8.1", features = ["static", "zlib"]}
ndarray = "0.15.0"


### 1. load_dataset from hdf5 files

In [13]:
use hdf5::{File};
use ndarray::{Array1, Array4};

fn load_dataset() -> (Array4<f32>, Array1<f32>, Array4<f32>, Array1<f32>) {
    // open HDF5 file
    let train_file = File::open("datasets/train_catvnoncat.h5").unwrap();
    // read dataset
    let train_x_dataset = train_file.dataset("train_set_x").unwrap();
    let train_y_dataset = train_file.dataset("train_set_y").unwrap();

    let test_file = File::open("datasets/test_catvnoncat.h5").unwrap();
    let test_x_dataset = test_file.dataset("test_set_x").unwrap();
    let test_y_dataset = test_file.dataset("test_set_y").unwrap();

    // change dataset to ndarray
    let train_x_ndarray: Array4<f32> = train_x_dataset.read_slice((.., .., .., ..)).unwrap();
    let train_y_ndarray: Array1<f32> = train_y_dataset.read_slice(..).unwrap();

    let test_x_ndarray: Array4<f32> = test_x_dataset.read_slice((.., .., .., ..)).unwrap();
    let test_y_ndarray: Array1<f32> = test_y_dataset.read_slice(..).unwrap();

    return (train_x_ndarray, train_y_ndarray, test_x_ndarray, test_y_ndarray)
}

### 2. load data from hdf5 files

In [22]:
let (train_x_ndarray, train_y_ndarray, test_x_ndarray, test_y_ndarray) = load_dataset();
println!("train_x_ndarray.shape: {:?}", train_x_ndarray.shape());
println!("train_y_ndarray.shape: {:?}", train_y_ndarray.shape());

println!("test_x_ndarray.shape: {:?}", test_x_ndarray.shape());
println!("test_y_ndarray.shape: {:?}", test_y_ndarray.shape());

train_x_ndarray.shape: [209, 64, 64, 3]
train_y_ndarray.shape: [209]
test_x_ndarray.shape: [50, 64, 64, 3]
test_y_ndarray.shape: [50]


### 3. reshape data

In [23]:
// reshape data
let train_numbers: usize = train_x_ndarray.shape()[0];
let features = 64 * 64 * 3;
let train_x_ndarray = train_x_ndarray.clone().into_shape((train_numbers, features)).unwrap();
let train_y_ndarray = train_y_ndarray.clone().into_shape((train_numbers, 1)).unwrap();
println!("train_x_ndarray.shape: {:?}", train_x_ndarray.shape());
println!("train_y_ndarray.shape: {:?}", train_y_ndarray.shape());

let test_numbers: usize = test_x_ndarray.shape()[0];
let test_x_ndarray = test_x_ndarray.clone().into_shape((test_numbers, features)).unwrap();
let test_y_ndarray = test_y_ndarray.clone().into_shape((test_numbers, 1)).unwrap();
println!("test_x_ndarray.shape: {:?}", test_x_ndarray.shape());
println!("test_y_ndarray.shape: {:?}", test_y_ndarray.shape());

train_x_ndarray.shape: [209, 12288]
train_y_ndarray.shape: [209, 1]
test_x_ndarray.shape: [50, 12288]
test_y_ndarray.shape: [50, 1]


### 4. convert ndarray to burn tensor

In [25]:
use burn::tensor::{Data, Shape, Tensor};
use burn::backend::{NdArray};
use burn::tensor::backend::Backend;
use ndarray::prelude::{ArrayBase, Dim};

type MyBackend = NdArray; 

fn ndarray_to_tensor<B: Backend>(ndarray: ArrayBase<ndarray::OwnedRepr<f32>, Dim<[usize; 2]>>) -> Tensor<B, 2>
    where Data<<B as Backend>::FloatElem, 1>: From<burn::tensor::Data<f32, 1>> {

    let device = Default::default();
    let m = ndarray.shape()[0];
    let features = ndarray.shape()[1];


    let mut result_tensor: Tensor<B, 2> = Tensor::zeros([m, features], &device);
    let shape = [features];  

    let mut i = 0;
    for row in ndarray.clone().rows() {
        let row_vec: Vec<f32> = row.to_vec();
        
        let data = Data::new(row_vec, Shape::new(shape));
        let row_tensor = Tensor::<B, 1>::from_data(data, &device).reshape([1, features]);
        
        result_tensor = result_tensor.slice_assign([i..i+1], row_tensor);
        i += 1;
    }

    result_tensor
}

// convert train_x_ndarray to burn tensor
let train_x_tensor: Tensor<MyBackend, 2> = ndarray_to_tensor(train_x_ndarray.clone());
// convert train_y_ndarray to burn tensor
let train_y_tensor: Tensor<MyBackend, 2> = ndarray_to_tensor(train_y_ndarray.clone());

// convert test_x_ndarray to burn tensor
let test_x_tensor: Tensor<MyBackend, 2> = ndarray_to_tensor(test_x_ndarray.clone());
// convert test_y_ndarray to burn tensor
let test_y_tensor: Tensor<MyBackend, 2> = ndarray_to_tensor(test_y_ndarray.clone());

println!("train_x_tensor ======: {}", train_x_tensor);
println!("train_y_tensor.shape ======: {:?}", train_y_tensor.shape());
println!("test_x_tensor ======: {}", test_x_tensor);
println!("test_y_tensor.shape ======: {:?}", test_y_tensor.shape());

  data:
[[17.0, 31.0, 56.0, ..., 0.0, 0.0, 0.0],
 [196.0, 192.0, 190.0, ..., 82.0, 80.0, 81.0],
 [82.0, 71.0, 68.0, ..., 138.0, 141.0, 142.0],
 ...
 [143.0, 155.0, 165.0, ..., 85.0, 107.0, 149.0],
 [22.0, 24.0, 23.0, ..., 4.0, 5.0, 0.0],
 [8.0, 28.0, 53.0, ..., 0.0, 0.0, 0.0]],
  shape:  [209, 12288],
  device:  Cpu,
  backend:  "ndarray",
  kind:  "Float",
  dtype:  "f32",
}
  data:
[[158.0, 104.0, 83.0, ..., 173.0, 128.0, 110.0],
 [115.0, 110.0, 111.0, ..., 171.0, 176.0, 186.0],
 [255.0, 253.0, 254.0, ..., 133.0, 101.0, 121.0],
 ...
 [41.0, 47.0, 84.0, ..., 183.0, 141.0, 116.0],
 [18.0, 18.0, 16.0, ..., 144.0, 137.0, 108.0],
 [133.0, 163.0, 75.0, ..., 5.0, 22.0, 5.0]],
  shape:  [50, 12288],
  device:  Cpu,
  backend:  "ndarray",
  kind:  "Float",
  dtype:  "f32",
}


### 5. Data standardization processing

In [26]:
let train_x_tensor = train_x_tensor.div_scalar(255.0);
let test_y_tensor = test_y_tensor.div_scalar(255.0);

println!("train_x_tensor ======: {}", train_x_tensor);
println!("test_x_tensor ======: {}", test_x_tensor);

  data:
[[0.06666667, 0.12156863, 0.21960784, ..., 0.0, 0.0, 0.0],
 [0.76862746, 0.7529412, 0.74509805, ..., 0.32156864, 0.3137255, 0.31764707],
 [0.32156864, 0.2784314, 0.26666668, ..., 0.5411765, 0.5529412, 0.5568628],
 ...
 [0.56078434, 0.60784316, 0.64705884, ..., 0.33333334, 0.41960785, 0.58431375],
 [0.08627451, 0.09411765, 0.09019608, ..., 0.015686275, 0.019607844, 0.0],
 [0.03137255, 0.10980392, 0.20784314, ..., 0.0, 0.0, 0.0]],
  shape:  [209, 12288],
  device:  Cpu,
  backend:  "ndarray",
  kind:  "Float",
  dtype:  "f32",
}
  data:
[[158.0, 104.0, 83.0, ..., 173.0, 128.0, 110.0],
 [115.0, 110.0, 111.0, ..., 171.0, 176.0, 186.0],
 [255.0, 253.0, 254.0, ..., 133.0, 101.0, 121.0],
 ...
 [41.0, 47.0, 84.0, ..., 183.0, 141.0, 116.0],
 [18.0, 18.0, 16.0, ..., 144.0, 137.0, 108.0],
 [133.0, 163.0, 75.0, ..., 5.0, 22.0, 5.0]],
  shape:  [50, 12288],
  device:  Cpu,
  backend:  "ndarray",
  kind:  "Float",
  dtype:  "f32",
}


### 6. sigmoid function

In [27]:
fn basic_sigmoid<B: Backend, const D: usize>(x: Tensor<B, D>) -> Tensor<B, D> {
    let ones = x.ones_like();
    return ones.clone() / (ones.clone() + (-x).exp());
}

### 7. initialize parameters

In [58]:
fn initialize_with_zeros<B: Backend, const D: usize>(shape: [usize; D]) -> (Tensor<B, D>, f32) {
    let device = Default::default();
    // 
    let w_tensor = Tensor::<B, D>::zeros(shape, &device);
    let b: f32 = 0.0;

    return (w_tensor, b);
}

// test initialize_with_zeros function
let (w, b) = initialize_with_zeros::<MyBackend, 2>([features, 1]);
println!("w_tensor ======: {}", w);
println!("b ======: {}", b);

  data:
[[0.0],
 [0.0],
 [0.0],
 ...
 [0.0],
 [0.0],
 [0.0]],
  shape:  [12288, 1],
  device:  Cpu,
  backend:  "ndarray",
  kind:  "Float",
  dtype:  "f32",
}


### 8. forward and backward propagation
compute dw,db and cost

In [59]:
/// compute dw,db and cost
/// params:
///     w: weights, shape: (num_features, 1)
///     b: bias, f32
///     X: x data, shape: (m, num_features)
///     Y: labels, shape:(m, 1)
/// return:
///      dw, db, cost
fn propagate<B: Backend, const D: usize>(w: Tensor<B, D>, b: f32, train_x: Tensor<B, D>, train_y: Tensor<B, D>) -> (Tensor<B, D>, f32, f32)
    where B: Backend<FloatElem = f32> {
    let m = train_x.dims()[0];

    // forword
    // X: (m, num_features), w: (num_features, 1), z: (m, 1)
    let z = train_x.clone().matmul(w).add_scalar(b);
    // A: (m, 1)
    let a = basic_sigmoid(z);

    // cost: f32
    // cost = (-1 / m) * np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A))
    // Y * np.log(A)
    let y_is_set = train_y.clone().mul(a.clone().log());
    // (1 - Y) * np.log(1 - A)
    let ones = train_y.ones_like();
    let y_is_not_set = (ones.clone() - train_y.clone()).mul((ones.clone() - a.clone()).log());
    let sum = (y_is_set + y_is_not_set).sum().into_scalar();
    let cost: f32 = (-1f32/(m as f32)) * sum;


    // backward
    // dz.shape: (m, 1)
    let dz: Tensor<B, D> = a.clone() - train_y.clone();
    // dw = (1 / m) * np.dot(X.T, dz)
    // dw.shape: (num_features, 1), X.T: (num_features, m), dz: (m, 1)
    let dw: Tensor<B, D> =  train_x.clone().transpose().matmul(dz.clone()).mul_scalar(1f32/(m as f32));

    // db = (1 / m) * np.sum(dz)
    let db: f32 = (1f32/(m as f32)) * dz.clone().sum().into_scalar();

    return (dw, db, cost);
}


// test propagate function
let (dw, db, cost) = propagate::<MyBackend, 2>(w.clone(), b.clone(), train_x_tensor.clone(), train_y_tensor.clone());
println!("dw ======: {}", dw);
println!("db ======: {}", db);
println!("cost ======: {}", cost);

  data:
[[0.047208935],
 [0.06299839],
 [0.049235366],
 ...
 [0.05074585],
 [0.062125903],
 [0.03245145]],
  shape:  [12288, 1],
  device:  Cpu,
  backend:  "ndarray",
  kind:  "Float",
  dtype:  "f32",
}


### 9. optimizing process
update w and b, and save cost

In [60]:
/// update w and b, and save cost
/// params:
///     w: weights, shape: (num_features, 1)
///     b: bias, f32
///     train_x: x data, shape: (m, num_features)
///     train_y: labels, shape:(m, 1)
///     num_iterations
///     learning_rate
/// return:
///      w, b, costs
fn optimize<B: Backend, const D: usize>(w: Tensor<B, D>,
                                        b: f32, 
                                        train_x: Tensor<B, D>,
                                        train_y: Tensor<B, D>,
                                        num_iterations: usize,
                                        learning_rate: f32) -> (Tensor<B, D>, f32, Vec<f32>)
    where B: Backend<FloatElem = f32> {
    let mut costs: Vec<f32> = vec![];
    let mut w = w;
    let mut b = b;

    for i in 0..num_iterations {
        let (dw, db, cost) = propagate::<B, D>(w.clone(), b, train_x.clone(), train_y.clone());

        w = w - dw.mul_scalar(learning_rate);
        b = b - learning_rate * db;

        if i % 100 == 0 {
            costs.push(cost);
            println!("cost result {}: {}", i, cost);
        }
    }

    return (w, b, costs);
}

let (w, b, costs) = optimize::<MyBackend, 2>(w.clone(), b, train_x_tensor.clone(), train_y_tensor.clone(), 201, 0.01);
println!("w_tensor ======: {}", w);
println!("b ======: {}", b);

cost result 0: 0.6931474
cost result 100: 0.8121615
cost result 200: 0.97539
  data:
[[0.0060156994],
 [-0.012305489],
 [-0.0060849716],
 ...
 [-0.0090845805],
 [-0.018362092],
 [0.0073324963]],
  shape:  [12288, 1],
  device:  Cpu,
  backend:  "ndarray",
  kind:  "Float",
  dtype:  "f32",
}


### 10. predict

In [61]:
use burn::tensor::{Bool};

/// params：
///         w: weights, shape: (num_features, 1)
///         b: bias
///         x: x data, shape: (m, num_features)
/// Returns:
///         y_prediction: shape: (m, 1) 
fn predict<B: Backend, const D: usize>(w: Tensor<B, D>,b: f32, x: Tensor<B, D>) -> Tensor<B, 2> {
    let m = x.dims()[0];

    // forward
    // x: (m, num_features), w: (num_features, 1), z: (m, 1)
    // z = (np.dot(X, w) + b).reshape(m, 1)
    let z = x.matmul(w).add_scalar(b).reshape([m, 1]);
    // a shape: (m, 1)
    let a = basic_sigmoid(z);

    let half = a.ones_like().mul_scalar(0.5);
    let y_prediction = a.greater_equal(half).float();

    return y_prediction;
}


// test predict function
let y_prediction = predict::<MyBackend, 2>(w.clone(), b, train_x_tensor.clone());
println!("y_prediction.shape ======: {:?}", y_prediction.shape());



### 11. implement all 

In [63]:
let num_features = train_x_tensor.dims()[1];

let (w, b) = initialize_with_zeros::<MyBackend, 2>([num_features, 1]);

// get w and b
let (w, b, costs) = optimize::<MyBackend, 2>(w, b, train_x_tensor.clone(), train_y_tensor.clone(), 2000, 0.005);

// predict
let y_prediction_train = predict::<MyBackend, 2>(w.clone(), b, train_x_tensor.clone());
let y_prediction_test = predict::<MyBackend, 2>(w.clone(), b, test_x_tensor.clone());

// print accuracy
println!("train data accuracy: {}", y_prediction_train.equal(train_y_tensor).float().mean());
println!("test  data accuracy: {}", y_prediction_test.equal(test_y_tensor).float().mean());

cost result 0: 0.6931474
cost result 100: 0.58450836
cost result 200: 0.46694905
cost result 300: 0.37600684
cost result 400: 0.33146328
cost result 500: 0.30327308
cost result 600: 0.27987957
cost result 700: 0.26004213
cost result 800: 0.24294068
cost result 900: 0.22800422
cost result 1000: 0.2148195
cost result 1100: 0.2030782
cost result 1200: 0.19254427
cost result 1300: 0.18303332
cost result 1400: 0.17439859
cost result 1500: 0.16652139
cost result 1600: 0.15930451
cost result 1700: 0.15266731
cost result 1800: 0.14654222
cost result 1900: 0.14087208
train data accuracy: Tensor {
  data:
[0.9904306],
  shape:  [1],
  device:  Cpu,
  backend:  "ndarray",
  kind:  "Float",
  dtype:  "f32",
}
test  data accuracy: Tensor {
  data:
[0.24],
  shape:  [1],
  device:  Cpu,
  backend:  "ndarray",
  kind:  "Float",
  dtype:  "f32",
}
