In [1]:
import julia
from julia.api import Julia
# no precompilation gives faster loading times, however GPU support through CUDA does not work
jl = Julia(compiled_modules=False)
%load_ext julia.magic

Initializing Julia interpreter. This may take some time...


## Arrays

In [2]:
import numpy as np
z = np.array([7, 8, 9])    

# we pass it to a Julia function and get back a Python numpy array
%julia double(x) = 2x
a = %julia double.($z)      

print(type(a), a)

<class 'numpy.ndarray'> [14 16 18]


In [3]:
n = 200
%julia n = $n

200

In [4]:
%timeit [x*2 for x in range(n)]

8.59 µs ± 516 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [5]:
myarray = np.arange(n)
%timeit myarray * 2

1.1 µs ± 34.3 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [6]:
%%julia
using BenchmarkTools

In [7]:
%%julia
myarray = $myarray
@btime double.(myarray)

  523.414 ns (4 allocations: 1.83 KiB)


array([  0,   2,   4,   6,   8,  10,  12,  14,  16,  18,  20,  22,  24,
        26,  28,  30,  32,  34,  36,  38,  40,  42,  44,  46,  48,  50,
        52,  54,  56,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
        78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102,
       104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128,
       130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154,
       156, 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180,
       182, 184, 186, 188, 190, 192, 194, 196, 198, 200, 202, 204, 206,
       208, 210, 212, 214, 216, 218, 220, 222, 224, 226, 228, 230, 232,
       234, 236, 238, 240, 242, 244, 246, 248, 250, 252, 254, 256, 258,
       260, 262, 264, 266, 268, 270, 272, 274, 276, 278, 280, 282, 284,
       286, 288, 290, 292, 294, 296, 298, 300, 302, 304, 306, 308, 310,
       312, 314, 316, 318, 320, 322, 324, 326, 328, 330, 332, 334, 336,
       338, 340, 342, 344, 346, 348, 350, 352, 354, 356, 358, 36

another tiny timing example

In [45]:
%timeit np.random.rand(1, n) @ np.random.rand(n, 1)

5.98 µs ± 206 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [8]:
%julia @btime rand(1, n) * rand(n, 1)

  666.703 ns (3 allocations: 3.59 KiB)


array([[49.14830203]])

## Programmatic

In [12]:
from julia import Main

Main.eval("""
array_double(array) = double.(array)
""")

jdouble = Main.eval("array_double")
jdouble_pyarray = Main.eval("pyfunction(array_double, PyArray)") 
jdouble_pyvector = Main.eval("pyfunction(array_double, PyVector)") 

In [18]:
myarray = np.arange(100000)
%timeit jdouble(myarray)

186 µs ± 16.9 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [17]:
myarray = np.arange(100000)
%timeit jdouble_pyarray(myarray)

98 µs ± 12.2 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [15]:
mylist = [7,8,9]
%timeit jdouble_pyvector(mylist)

5.54 µs ± 461 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [58]:
jl_mat_mul = Main.eval("(*)")
jl_mat_mul_pyarray = Main.eval("pyfunction(*, PyArray, PyArray)") 

In [61]:
%timeit jl_mat_mul_pyarray(np.random.rand(1, n), np.random.rand(n, 1))

17.2 µs ± 123 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [9]:
import jnumpy
jnumpy.init_jl()
jnumpy.init_project("./01-introduction-julia.ipynb")

In [10]:
from _example import jl_mat_double

In [48]:
%timeit jl_mat_double(myarray)

6.05 µs ± 234 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [50]:
from _example import jl_mat_mul
%timeit jl_mat_mul(np.random.rand(1, n), np.random.rand(n, 1))

14.6 µs ± 414 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


## DataFrames

see https://dataframes.juliadata.org/stable/man/comparisons/

In [62]:
import pandas as pd
import numpy as np

df = pd.DataFrame({'grp': [1, 2, 1, 2, 1, 2],
                   'x': range(6, 0, -1),
                   'y': range(4, 10),
                   'z': [3, 4, 5, 6, 7, None]},
                   index = list('abcdef'))
df2 = pd.DataFrame({'grp': [1, 3], 'w': [10, 11]})

df.groupby('grp')['x'].mean()

grp
1    4.0
2    3.0
Name: x, dtype: float64

In [65]:
%%julia
using DataFrames
using Statistics

In [66]:
%%julia
df = DataFrame(grp=repeat(1:2, 3), x=6:-1:1, y=4:9, z=[3:7; missing], id='a':'f')
df2 = DataFrame(grp=[1, 3], w=[10, 11])

combine(groupby(df, :grp), :x => mean)

<PyCall.jlwrap 2×2 DataFrame
 Row │ grp    x_mean
     │ Int64  Float64
─────┼────────────────
   1 │     1      4.0
   2 │     2      3.0>

timing

In [67]:
%timeit df.groupby('grp')['x'].mean()

256 µs ± 5.43 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [68]:
%julia @btime combine(groupby(df, :grp), :x => mean)

  41.656 μs (287 allocations: 17.29 KiB)


<PyCall.jlwrap 2×2 DataFrame
 Row │ grp    x_mean
     │ Int64  Float64
─────┼────────────────
   1 │     1      4.0
   2 │     2      3.0>

In [64]:
%%julia
Pkg.activate(Base.current_project())

  Activating project at `~/Projects/Jolin.io/workshop-accelerate-Python-with-Julia`
