Julia calculation: 0.9 seconds

In [6]:
function calculate_pi(n)
    pi_estimate = 0.0
    sign = 1.0
    for i in 0:n-1
        pi_estimate += sign / (2i + 1)
        sign = -sign
    end
    return 4 * pi_estimate
end

n = 1_000_000_000
println("Pi estimate using $n iterations: ", calculate_pi(n))


Pi estimate using 1000000000 iterations: 3.1415926525880504


Python numpy calculation, 5 minutes 5 seconds.

In [1]:
import numpy as np

# Define a function to generate the alternating sign sequence
def alternating_sign(n):
    # Generate a sequence of alternating -1 and 1 values
    sign = np.ones(n)
    sign[1::2] = -1
    return sign

# Define a function to calculate pi using the Leibniz algorithm
def calculate_pi(n):
    # Generate the series of terms using a NumPy array
    i = np.arange(n)
    sign = alternating_sign(n)
    term = np.where(sign == 1, 1 / (2 * i + 1), -1 / (2 * i + 1))

    # Sum the terms
    pi_estimate = 4 * np.sum(term)

    return pi_estimate

# Call the calculate_pi function with n = 1000000
n = 1_000_000_000
pi_estimate = calculate_pi(n)
print(f"Pi estimate using {n} iterations: {pi_estimate}")

Pi estimate using 1000000000 iterations: 3.1415926525899134


In [1]:
%load_ext cython

Cython, very simple notation and while loop, not imporable, 0.1 seconds

In [3]:
%%cython

cdef long int intsum(int[:] values):
    cdef long int sum = 0
    cdef int *ptr = &values[0]
    cdef long int i = 0
    while i < len(values):
        sum += ptr[i]
        i += 1
    return sum

import array

# Define the input array with 1 billion integers
values = array.array('i', range(1_000_000_0000))

# Call the intsum function with the input array
result = intsum(values)

# Print the result to the console
print(result)

Content of stdout:
_cython_magic_08201700551ca11b4f267964c65baaec4861dcdf.c
   Creating library C:\Users\nbutterly\.ipython\cython\Users\nbutterly\.ipython\cython\_cython_magic_08201700551ca11b4f267964c65baaec4861dcdf.cp310-win_amd64.lib and object C:\Users\nbutterly\.ipython\cython\Users\nbutterly\.ipython\cython\_cython_magic_08201700551ca11b4f267964c65baaec4861dcdf.cp310-win_amd64.exp
Generating code
Finished generating code

OverflowError: Python int too large to convert to C long

0.1 second Cython, simple typehinging and importable

In [17]:
%%cython

from libc.math cimport pow
from libc.stdlib cimport malloc, free

# Define a function to calculate pi using the Leibniz algorithm
def calculate_pi(int n):
    # Generate the series of terms using a dynamically allocated array
    cdef double* term = <double*>malloc(n * sizeof(double))
    cdef double sign = 1.0
    cdef int i
    for i in range(n):
        term[i] = sign / (2 * i + 1)
        sign = -sign

    # Sum the terms
    cdef long double pi_estimate = 4 * 0
    for i in range(n):
        pi_estimate += term[i]

    # Free the dynamically allocated array
    free(term)

    return pi_estimate


# Call the calculate_pi function with n = 1000000
cdef int n = 10000000000
cdef double pi_estimate = calculate_pi(n)
print("Pi estimate using %d iterations: %f" % (n, pi_estimate))

Cython complex notation 0.1 seconds

In [13]:
%%cython

from libc.math cimport pow
from libc.stdlib cimport malloc, free
from time cimport clock, CLOCKS_PER_SEC

# Define a function to generate the alternating sign sequence
cdef void alternating_sign(int n, double* sign):
    # Generate a sequence of alternating -1 and 1 values
    cdef int i
    for i in range(n):
        sign[i] = pow(-1, i)

# Define a function to calculate pi using the Leibniz algorithm
def calculate_pi(int n):
    # Generate the series of terms using a dynamically allocated array
    cdef double* term = <double*>malloc(n * sizeof(double))
    cdef double* sign = <double*>malloc(n * sizeof(double))
    alternating_sign(n, sign)
    cdef int i
    for i in range(n):
        term[i] = sign[i] / (2 * i + 1)

    # Sum the terms
    cdef double pi_estimate = 4 * 0
    for i in range(n):
        pi_estimate += term[i]

    # Free the dynamically allocated arrays
    free(term)
    free(sign)

    return pi_estimate

cdef clock_t begin = clock()
# Call the calculate_pi function with n = 1000000
cdef int n = 10000000000
cdef double pi_estimate = calculate_pi(n)
print("Pi estimate using %d iterations: %f" % (n, pi_estimate))
cdef clock_t end = clock()

cdef double time_spent = (<double>(end - begin)) / CLOCKS_PER_SEC
print("Time spent: %f" % time_spent)

Python dask array: 21.1 seconds

In [40]:
import dask.array as da

# Define a function to generate the alternating sign sequence
def alternating_sign(n):
    # Generate a sequence of alternating -1 and 1 values
    sign = da.ones(n)
    sign[1::2] = -1
    return sign

# Define a function to calculate pi using the Leibniz algorithm
def calculate_pi(n):
    # Generate the series of terms using a Dask array
    i = da.arange(n)
    sign = alternating_sign(n)
    term = da.where(sign == 1, 1 / (2 * i + 1), -1 / (2 * i + 1))

    # Sum the terms
    pi_estimate = 4 * term.sum()

    return pi_estimate

# Call the calculate_pi function with n = 1000000
n = 1000000000
pi_estimate = calculate_pi(n)
print(f"Pi estimate using {n} iterations: {pi_estimate.compute()}")

Pi estimate using 1000000000 iterations: 3.1415926525897957


C language manually coded - 6.9 seconds:

In [None]:
#include<stdio.h>
#include<conio.h>
#include<math.h>
#include<time.h>


int main()
{
	clock_t start, end;
	start = clock();
	long int i;
	long int n = 1000000000;
	double sum = 0.0, term, pi;
	/* Applying Leibniz Formula */
	for (i = 0; i < n; i++)
	{
		term = pow(-1, i) / (2 * i + 1);
		sum += term;
	}
	pi = 4 * sum;
	printf("\nPI = %.6lf", pi);
    end = clock();
	double cpu_time_used = ((double)(end - start)) / CLOCKS_PER_SEC;
	printf("\nTime taken: %lf seconds", cpu_time_used);
	return 0;
}

In [11]:
import pandas
import dask.array as da
import dask.dataframe as dd
from pathlib import Path

# Generate a large numpy array of random values
x = da.random.random(size=(100_000_00, 10), chunks=(100_0000, 10))


# Define a function to apply to each row
def my_func(row):
    return row.sum()

# Apply the function to each row using map_rows
# result = da.apply_along_axis(my_func, axis=1, arr=x)
ddf = dd.from_dask_array(x)
# # add column names to ddf
ddf.columns = ['col1', 'col2', 'col3', 'col4', 'col5', 'col6', 'col7', 'col8','col9', 'col10']
ddf.compute()
# print(Path.cwd())
dd.to_parquet(df=ddf, path='resasdgasdfasdafagasgasfsagsdfasdgfdsuasasfasdfdfasfasasdgsagfasflt.parquet')
print(ddf)

Dask DataFrame Structure:
                   col1     col2     col3     col4     col5     col6     col7     col8     col9    col10
npartitions=10                                                                                          
0               float64  float64  float64  float64  float64  float64  float64  float64  float64  float64
1000000             ...      ...      ...      ...      ...      ...      ...      ...      ...      ...
...                 ...      ...      ...      ...      ...      ...      ...      ...      ...      ...
9000000             ...      ...      ...      ...      ...      ...      ...      ...      ...      ...
9999999             ...      ...      ...      ...      ...      ...      ...      ...      ...      ...
Dask Name: rename, 3 graph layers


In [1]:
import pandas as pd
data = {'first_name': ['Sigrid', 'Joe', 'Theodoric','Kennedy', 'Beatrix', 'Olimpia', 'Grange', 'Sallee'],
        'last_name': ['Mannock', 'Hinners', 'Rivers', 'Donnell', 'Parlett', 'Guenther', 'Douce', 'Johnstone'],
        'age': [27, 31, 36, 53, 48, 36, 40, 34],
        'amount_1': [7.17, 1.90, 1.11, 1.41, 6.69, 4.62, 1.01, 4.88],
        'amount_2': [8.06,  "?", 5.90,  "?",  "?", 7.48, 4.37,  "?"]}
datosDataFrame = pd.DataFrame(data)
print(datosDataFrame)
datosDataFrame.to_csv('example.csv')

  first_name  last_name  age  amount_1 amount_2
0     Sigrid    Mannock   27      7.17     8.06
1        Joe    Hinners   31      1.90        ?
2  Theodoric     Rivers   36      1.11      5.9
3    Kennedy    Donnell   53      1.41        ?
4    Beatrix    Parlett   48      6.69        ?
5    Olimpia   Guenther   36      4.62     7.48
6     Grange      Douce   40      1.01     4.37
7     Sallee  Johnstone   34      4.88        ?
