# What's new in Mojo🔥 SDK v0.5

## Keyword parameters

In [1]:
from tensor import Tensor
from algorithm import vectorize


struct SquareMatrix[dtype: DType = DType.float32, dim: Int = 4]():
    var mat: Tensor[dtype]

    fn __init__(inout self, val: SIMD[dtype, 1] = 5):
        self.mat = Tensor[dtype](self.dim, self.dim)
        alias simd_width = simdwidthof[dtype]()

        @parameter
        fn fill_val[simd_width: Int](idx: Int) -> None:
            self.mat.simd_store(idx, self.mat.simd_load[simd_width](idx).splat(val))

        vectorize[fill_val, simd_width](self.mat.num_elements())

    fn __getitem__(self, x: Int, y: Int) -> SIMD[dtype, 1]:
        return self.mat[x, y]

    fn print(self):
        print(self.mat)

In [2]:
SquareMatrix().print()

Tensor([[5.0, 5.0, 5.0, 5.0],
[5.0, 5.0, 5.0, 5.0],
[5.0, 5.0, 5.0, 5.0],
[5.0, 5.0, 5.0, 5.0]], dtype=float32, shape=4x4)


In [3]:
SquareMatrix(val=12).print()

Tensor([[12.0, 12.0, 12.0, 12.0],
[12.0, 12.0, 12.0, 12.0],
[12.0, 12.0, 12.0, 12.0],
[12.0, 12.0, 12.0, 12.0]], dtype=float32, shape=4x4)


In [4]:
SquareMatrix[DType.float64](10).print()

Tensor([[10.0, 10.0, 10.0, 10.0],
[10.0, 10.0, 10.0, 10.0],
[10.0, 10.0, 10.0, 10.0],
[10.0, 10.0, 10.0, 10.0]], dtype=float64, shape=4x4)


In [5]:
SquareMatrix[DType.float64,dim=3](1).print()

Tensor([[1.0, 1.0, 1.0],
[1.0, 1.0, 1.0],
[1.0, 1.0, 1.0]], dtype=float64, shape=3x3)


In [6]:
SquareMatrix[dtype=DType.float64,dim=3](val=1.5).print()

Tensor([[1.5, 1.5, 1.5],
[1.5, 1.5, 1.5],
[1.5, 1.5, 1.5]], dtype=float64, shape=3x3)


Keyword argument in `__getitem__()`

In [7]:
var sm = SquareMatrix()
sm.print()

print()
print("Keyword argument in __getitem__()")
print(sm[x=0, y=3])

Tensor([[5.0, 5.0, 5.0, 5.0],
[5.0, 5.0, 5.0, 5.0],
[5.0, 5.0, 5.0, 5.0],
[5.0, 5.0, 5.0, 5.0]], dtype=float32, shape=4x4)

Keyword argument in __getitem__()
5.0


## Automatic parameterization of functions

* Parameters are automatically added as input parameters on the function
* Function argument input parameters can now be referenced within the signature of the function

In [8]:
from math import mul


fn multiply(sm: SquareMatrix, val: SIMD[sm.dtype, 1]) -> Tensor[sm.dtype]:
    alias simd_width: Int = simdwidthof[sm.dtype]()
    var result_tensor = Tensor[sm.dtype](sm.mat.shape())

    @parameter
    fn vectorize_multiply[simd_width: Int](idx: Int) -> None:
        result_tensor.simd_store[simd_width](
            idx, mul[sm.dtype, simd_width](sm.mat.simd_load[simd_width](idx), val)
        )

    vectorize[vectorize_multiply, simd_width](sm.mat.num_elements())
    return result_tensor


fn main():
    var sm = SquareMatrix(5)
    var res = multiply(sm, 100.0)
    print(res)


main()


Tensor([[500.0, 500.0, 500.0, 500.0],
[500.0, 500.0, 500.0, 500.0],
[500.0, 500.0, 500.0, 500.0],
[500.0, 500.0, 500.0, 500.0]], dtype=float32, shape=4x4)


## Load and save Tensors + String enhancements

In [9]:
from tensor import Tensor
from algorithm import vectorize
from time import now
from memory import memcpy


struct SquareMatrix[dtype: DType = DType.float32, dim: Int = 4]():
    var mat: Tensor[dtype]

    fn __init__(inout self, val: SIMD[dtype, 1] = 5):
        self.mat = Tensor[dtype](self.dim, self.dim)
        alias simd_width = simdwidthof[dtype]()

        @parameter
        fn fill_val[simd_width: Int](idx: Int) -> None:
            self.mat.simd_store(idx, self.mat.simd_load[simd_width](idx).splat(val))

        vectorize[fill_val, simd_width](self.mat.num_elements())

    fn print(self):
        print(self.mat)

    fn prepare_filename(self, fname: String) -> String:
        var fpath = fname
        if fpath.count(".") < 2:
            fpath += ".data"
        fpath = fpath.replace(".", "_" + self.mat.spec().__str__() + ".")
        if fpath.find("/"):
            fpath = "./" + fpath
        return fpath

    fn save(self, fname: String = "saved_matrix") raises -> String:
        var fpath = self.prepare_filename(fname)
        self.mat.tofile(fpath)
        print("File saved:", fpath)
        return fpath

    @staticmethod
    fn load[dtype: DType, dim: Int](fpath: String) raises -> Tensor[dtype]:
        var load_mat = Tensor[dtype].fromfile(fpath)
        var new_tensor = Tensor[dtype](dim, dim)
        memcpy(new_tensor.data(), load_mat.data(), load_mat.num_elements())
        _ = load_mat
        return new_tensor

In [10]:
var m = SquareMatrix()
m.print()
var fpath = m.save("saved_matrix")

Tensor([[5.0, 5.0, 5.0, 5.0],
[5.0, 5.0, 5.0, 5.0],
[5.0, 5.0, 5.0, 5.0],
[5.0, 5.0, 5.0, 5.0]], dtype=float32, shape=4x4)
File saved: ./saved_matrix_4x4xfloat32.data


In [11]:
print("Loading Tensor from file:", fpath)
print()
var load_mat = SquareMatrix.load[DType.float32, 4](fpath)
print(load_mat)

Loading Tensor from file: ./saved_matrix_4x4xfloat32.data

Tensor([[5.0, 5.0, 5.0, 5.0],
[5.0, 5.0, 5.0, 5.0],
[5.0, 5.0, 5.0, 5.0],
[5.0, 5.0, 5.0, 5.0]], dtype=float32, shape=4x4)


## Benchmark enhancements

Benchmark row-wise `mean()` of a matrix by vectorizing across columns and parallelizing across rows

In [12]:
from random import rand
var tx = rand[DType.float32](5,7)
print(tx)

Tensor([[0.085032448172569275, 0.89161127805709839, 0.18968977034091949, ..., 0.74351245164871216, 0.5603899359703064, 0.8095666766166687],
[0.51171255111694336, 0.99508452415466309, 0.96661138534545898, ..., 0.65299874544143677, 0.96153312921524048, 0.85798734426498413],
[0.29402613639831543, 0.41464456915855408, 0.51489287614822388, ..., 0.544272780418396, 0.093629911541938782, 0.43225952982902527],
[0.84492743015289307, 0.77284646034240723, 0.19185894727706909, ..., 0.18134318292140961, 0.57914149761199951, 0.31413143873214722],
[0.41198459267616272, 0.9923054575920105, 0.16392241418361664, ..., 0.076218202710151672, 0.17452387511730194, 0.037299912422895432]], dtype=float32, shape=5x7)


In [14]:
from tensor import Tensor
from random import rand
import benchmark
from time import sleep
from algorithm import vectorize, parallelize

alias dtype = DType.float32
alias simd_width = simdwidthof[DType.float32]()


fn row_mean_naive[dtype: DType](t: Tensor[dtype]) -> Tensor[dtype]:
    var res = Tensor[dtype](t.dim(0), 1)
    for i in range(t.dim(0)):
        for j in range(t.dim(1)):
            res[i] += t[i, j]
        res[i] /= t.dim(1)
    return res


fn row_mean_fast[dtype: DType](t: Tensor[dtype]) -> Tensor[dtype]:
    var res = Tensor[dtype](t.dim(0), 1)

    @parameter
    fn parallel_reduce_rows(idx1: Int) -> None:
        @parameter
        fn vectorize_reduce_row[simd_width: Int](idx2: Int) -> None:
            res[idx1] += t.simd_load[simd_width](idx1 * t.dim(1) + idx2).reduce_add()

        vectorize[vectorize_reduce_row, 2 * simd_width](t.dim(1))
        res[idx1] /= t.dim(1)

    parallelize[parallel_reduce_rows](t.dim(0), t.dim(0))
    return res


fn main():
    var t = rand[dtype](1000, 100000)
    var result = Tensor[dtype](t.dim(0), 1)

    @parameter
    fn bench_mean():
        _ = row_mean_naive(t)

    @parameter
    fn bench_mean_fast():
        _ = row_mean_fast(t)

    var report = benchmark.run[bench_mean](max_runtime_secs=1)
    var report_fast = benchmark.run[bench_mean_fast](max_runtime_secs=1)
    report.print()
    report_fast.print()
    print("Speed up:", report.mean() / report_fast.mean())


main()

---------------------
Benchmark Report (s)
---------------------
Mean: 0.3632043333333333
Total: 2.1792259999999999
Iters: 6
Warmup Mean: 0.37432949999999998
Warmup Total: 0.74865899999999996
Warmup Iters: 2
Fastest Mean: 1.7976931348623157e+308
Slowest Mean: 0.3632043333333333

---------------------
Benchmark Report (s)
---------------------
Mean: 0.0065975499999999998
Total: 1.31951
Iters: 200
Warmup Mean: 0.0081960000000000002
Warmup Total: 0.016392
Warmup Iters: 2
Fastest Mean: 0.0065975499999999998
Slowest Mean: 0.0065975499999999998

Speed up: 55.051395341199886


## SIMD enhancements

In [16]:
def main():
    alias dtype = DType.float32
    alias simd_width = simdwidthof[DType.float32]()

    var a = SIMD[dtype].splat(0.5)
    var b = SIMD[dtype].splat(2.5)

    print("SIMD a:", a)
    print("SIMD b:", b)
    print()
    print("SIMD a.join(b):", a.join(b))


main()

SIMD a: [0.5, 0.5, 0.5, 0.5]
SIMD b: [2.5, 2.5, 2.5, 2.5]

SIMD a.join(b): [0.5, 0.5, 0.5, 0.5, 2.5, 2.5, 2.5, 2.5]
