In [1]:
from juliacall import Main as jl
%load_ext juliacall.ipython
# JuliaCall comes with its own Julia dependency file juliapkg.json
# however for binder it is much simpler to just reuse binder's installation mechanism
%julia Pkg.activate(Base.current_project())
%julia using PythonCall
%julia set_var(k, v) = @eval $(Symbol(k)) = $v

## Arrays

In [4]:
n = 1000
jl.set_var("n", n)

1000

In [2]:
import numpy as np
z = np.array([7, 8, 9])    

# we pass it to a Julia function and get back a Python numpy array
%julia double(x) = 2x
%julia array_double(x) = double.(x)

a = jl.array_double(z)    

print(type(a), a)

<class 'juliacall.VectorValue'> [14, 16, 18]


In [3]:
np.array(a) * 2

array([28, 32, 36])

In [56]:
%timeit [x*2 for x in range(n)]

44 µs ± 1.61 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [57]:
%timeit jl.array_double(range(n))

9.07 µs ± 344 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [70]:
nparray = np.arange(n)
%timeit nparray * 2

1.38 µs ± 61.5 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [71]:
%timeit jl.array_double(nparray)

10.9 µs ± 450 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [72]:
%julia using BenchmarkTools

In [74]:
jl.set_var("nparray", nparray);

In [75]:
%%julia
@btime double.(nparray)

jlarray = pyconvert(Vector, nparray)
@btime double.(jlarray)

  1.223 μs (4 allocations: 8.12 KiB)
  927.100 ns (4 allocations: 8.00 KiB)


1000-element Vector{Int64}:
    0
    2
    4
    6
    8
   10
   12
   14
   16
   18
    ⋮
 1982
 1984
 1986
 1988
 1990
 1992
 1994
 1996
 1998

another tiny timing example

In [80]:
%timeit np.random.rand(1, n) @ np.random.rand(n, 1)

16.1 µs ± 664 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [20]:
%julia @btime rand(1, n) * rand(n, 1)

  655.610 ns (3 allocations: 3.59 KiB)


1×1 Matrix{Float64}:
 52.310925054970284

## Programmatic

if you are not in a jupyter notebook, you can simply write a julia file

In [76]:
%%writefile example.jl

a = 2
myfunc(args...; kwargs...) = (args, kwargs)

Overwriting example.jl


and include it

In [77]:
jl.seval('include("example.jl")')

myfunc (generic function with 1 method)

or use `jl.seval` directly

In [78]:
jl.seval("""begin
    a = 2
    myfunc(args...; kwargs...) = (args, kwargs)
end""")

myfunc (generic function with 1 method)

In [81]:
jl.myfunc(1,4, [1,2,3], range(10), mykey=list)

((1, 4, [1, 2, 3], range(0, 10)),
 <jl Base.Pairs{Symbol, PythonCall.Py, Tuple{Symbol}, NamedTuple{(:mykey,), Tuple{PythonCall.Py}}}(:mykey => <class 'list'>)>)

## DataFrames

see https://dataframes.juliadata.org/stable/man/comparisons/

In [82]:
import pandas as pd
import numpy as np

df = pd.DataFrame({'grp': [1, 2, 1, 2, 1, 2],
                   'x': range(6, 0, -1),
                   'y': range(4, 10),
                   'z': [3, 4, 5, 6, 7, None]},
                   index = list('abcdef'))
df2 = pd.DataFrame({'grp': [1, 3], 'w': [10, 11]})

df.groupby('grp')['x'].mean()

grp
1    4.0
2    3.0
Name: x, dtype: float64

In [85]:
%%julia
using DataFrames
using Statistics

In [86]:
%%julia
df = DataFrame(grp=repeat(1:2, 3), x=6:-1:1, y=4:9, z=[3:7; missing], id='a':'f')
df2 = DataFrame(grp=[1, 3], w=[10, 11])

combine(groupby(df, :grp), :x => mean)

Row,grp,x_mean
Unnamed: 0_level_1,Int64,Float64
1,1,4.0
2,2,3.0


timing

In [87]:
%timeit df.groupby('grp')['x'].mean()

245 µs ± 14.1 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [88]:
%julia @btime combine(groupby(df, :grp), :x => mean)

  45.976 μs (297 allocations: 17.96 KiB)


Row,grp,x_mean
Unnamed: 0_level_1,Int64,Float64
1,1,4.0
2,2,3.0
