In [1]:
import julia
from julia.api import Julia
# no precompilation gives faster loading times, however GPU support through CUDA does not work
jl = Julia(compiled_modules=False)
%load_ext julia.magic

Initializing Julia interpreter. This may take some time...


## Arrays

In [2]:
import numpy as np
z = np.array([7, 8, 9])    

# we pass it to a Julia function and get back a Python numpy array
%julia double(x) = 2x
a = %julia double.($z)      

print(type(a), a)

<class 'numpy.ndarray'> [14 16 18]


In [3]:
%timeit -n 10 np.array([7,8,9]) * 2

The slowest run took 19.06 times longer than the fastest. This could mean that an intermediate result is being cached.
5.13 µs ± 9.04 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [36]:
%%julia
using BenchmarkTools

In [16]:
%julia @btime double.([7,8,9])

  70.375 ns (2 allocations: 160 bytes)


array([14, 16, 18], dtype=int64)

In [17]:
%timeit -n 10 np.random.rand(1, 100) @ np.random.rand(100, 1)

The slowest run took 35.50 times longer than the fastest. This could mean that an intermediate result is being cached.
27.4 µs ± 51.2 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [18]:
%julia @btime rand(1, 100) * rand(100, 1)

  595.848 ns (3 allocations: 1.81 KiB)


array([[25.81028628]])

## DataFrames

see https://dataframes.juliadata.org/stable/man/comparisons/

In [31]:
import pandas as pd
import numpy as np

df = pd.DataFrame({'grp': [1, 2, 1, 2, 1, 2],
                   'x': range(6, 0, -1),
                   'y': range(4, 10),
                   'z': [3, 4, 5, 6, 7, None]},
                   index = list('abcdef'))
df2 = pd.DataFrame({'grp': [1, 3], 'w': [10, 11]})

df.groupby('grp')['x'].mean()

grp
1    4.0
2    3.0
Name: x, dtype: float64

In [None]:
%%julia
using DataFrames
using Statistics

In [32]:
%%julia
df = DataFrame(grp=repeat(1:2, 3), x=6:-1:1, y=4:9, z=[3:7; missing], id='a':'f')
df2 = DataFrame(grp=[1, 3], w=[10, 11])

combine(groupby(df, :grp), :x => mean)

<PyCall.jlwrap 2×2 DataFrame
 Row │ grp    x_mean
     │ Int64  Float64
─────┼────────────────
   1 │     1      4.0
   2 │     2      3.0>

In [33]:
%timeit df.groupby('grp')['x'].mean()

265 µs ± 20 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [35]:
%julia @btime combine(groupby(df, :grp), :x => mean)

  43.626 μs (287 allocations: 17.29 KiB)


<PyCall.jlwrap 2×2 DataFrame
 Row │ grp    x_mean
     │ Int64  Float64
─────┼────────────────
   1 │     1      4.0
   2 │     2      3.0>

In [24]:
%%julia
Pkg.activate(Base.current_project())

  Activating project at `~/Projects/Jolin.io/workshop-accelerate-Python-with-Julia`
