In [35]:
import Sloth as sl
import pandas as pd
import numpy as np

In [36]:
pdf = pd.read_csv(r"GOOG.csv", index_col=0, parse_dates=True, infer_datetime_format=True)
sdf = sl.DataFrame.from_pandas(pdf)

In [4]:
%load_ext Cython

In [34]:
%%cython
from Sloth.frame cimport Series, DataFrame

cdef Series h_str_1(DataFrame self, str arg):
    return Series(
        # A 1d numpy array
        values=self.values[:, self.columns.get_item(arg)], 
        index=self.index, 
        name=arg
    ) 

def get_1(df, arg):
    return h_str_1(df, arg)

In [28]:
%%cython
from Sloth.frame cimport Series, DataFrame
cimport numpy as np
import numpy as np

cdef Series h_2(DataFrame self, str arg):
    cdef int col_idx = self.columns.get_item(arg)
    cdef int nrows = self.values.shape[0]
    cdef np.ndarray[np.float64_t, ndim=1] col_values = np.empty(nrows, dtype=np.float64)
    cdef int i

    for i in range(nrows):
        col_values[i] = self.values[i, col_idx]

    return Series(
        values=col_values,
        index=self.index,
        name=arg
    )

def get_2(df, arg):
    return h_2(df, arg)

In [32]:
%%cython
from Sloth.frame cimport Series, DataFrame
cimport numpy as np
import numpy as np
from libc.stdlib cimport malloc, free


cdef Series h_3(self, arg):
    cdef int col_idx = self.columns.get_item(arg)
    cdef int nrows = self.values.shape[0]
    cdef double* col_values = <double*>malloc(nrows * sizeof(double))
    cdef int i

    # Accessing array elements using pointers
    for i in range(nrows):
        col_values[i] = self.values[i, col_idx]

    # Convert the pointer array to numpy array
    col_values_np = np.asarray(<double[:nrows]>col_values).copy()
    free(col_values)

    return Series(
        values=col_values_np,
        index=self.index,
        name=arg
    )

def get_3(df, arg):
    return h_3(df, arg)

In [23]:
%timeit get_1(sdf, "Open")

3.68 µs ± 82.6 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [29]:
%timeit get_2(sdf, "Open")

624 µs ± 13.6 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [33]:
%timeit get_3(sdf, "Open")

761 µs ± 332 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [48]:
%%cython 

import numpy as np

int[] 

a = np.array([1, 2, 3]).view()
print(a)

a[0] = 5

print(a)

In [47]:
%%cython

from libc.stdio cimport printf

cdef int b = 5
cdef int *c = &b
b = 6

print(b, <int> c[0])


6 6


In [52]:
%%cython
import numpy as np
cimport numpy as np

def modify_array(np.ndarray[np.float64_t, ndim=1] arr):
    """
    Create a new NumPy array with modified elements from the input array.
    """
    cdef int n = arr.shape[0]
    cdef np.ndarray[np.float64_t, ndim=1] new_arr = np.empty(n, dtype=np.float64)  # Allocate new array
    cdef double* input_ptr = <double*> arr.data  # Pointer to input array data
    cdef double* new_ptr = <double*> new_arr.data  # Pointer to new array data
    cdef int i
    
    for i in range(n):
        new_ptr[i] = input_ptr[i] * 2  # Example operation: doubling each element
    
    return new_arr


In [55]:
import numpy as np

# Create a NumPy array
arr = np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float64)

print("Before modification:", arr)

# Call the Cython function
a2 = modify_array(arr)

print("After modification:", a2)


Before modification: [1. 2. 3. 4.]
After modification: [2. 4. 6. 8.]


In [56]:
arr.data

<memory at 0x0000027A4B220DC0>

In [54]:
%timeit modify_array(arr)
%timeit arr * 2

597 ns ± 16.5 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
1.33 µs ± 28.8 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
