# Numba optimization

Numba works by generating optimized machine code using the LLVM compiler infrastructure at import time, runtime, or statically (using the included pycc tool). Numba supports compilation of Python to run on either CPU or GPU hardware, and is designed to integrate with the Python scientific software stack.

In [10]:
from typing import List

import numba
import numpy as np
import pandas as pd



In [11]:
# original code from C.E.
def calc_slope(data: pd.DataFrame, 
               key_columns: List[str], 
               slope_column: str, 
               fact_name: str) -> pd.DataFrame:
    if not data.empty:
        pdf_with_slope = (
            data.sort_values(by="period_seq").groupby(key_columns).apply(slope, slope_column).reset_index()
        )
        if pdf_with_slope.empty:
            data[fact_name] = 0.0
        else:
            pdf_with_slope.columns = key_columns + [fact_name]
            data = data.merge(pdf_with_slope, on=key_columns)
            return data
    else:
        return pd.DataFrame(columns=data.columns.to_list() + [fact_name])

def slope(data: pd.DataFrame, sales_column: str):
    num_periods = list(range(data.shape[0]))
    sales = data[sales_column].to_numpy()

    sum_num_periods = sum(num_periods)
    sum_sales = sum(sales)

    sum_num_periods_sales = np.dot(num_periods, sales)
    sum_num_periods_square = np.dot(num_periods, num_periods)

    square_of_sum_num_periods = sum_num_periods * sum_num_periods
    slope_num = len(num_periods) * sum_num_periods_sales - sum_num_periods * sum_sales
    slope_den = len(num_periods) * sum_num_periods_square - square_of_sum_num_periods

    return slope_num / slope_den


In CE we use `calc_slope` function twice inside the rule_53 implementation.

```python
output_df = calc_slope(output_df, ['item_id', 'competitor_item_id'], 'salesunits_own', 'slope_own')
#...
output_df = calc_slope(output_df, ['item_id', 'competitor_item_id'], 'salesunits_competitor', 'slope_competitor')
```

Let's check how expensive this function is, and if we can improve the execution time with numba.


In [12]:
# Prepare a sample data frame
#    We need the cell's data and configuration as they are used in CE:

configuration = {
    "country_code": "CN",
    "item_group_code": "PTV_FLAT",
    "market_configuration": {
      "ce": {
        "low_price_percentage": 0.1,
        "high_price_percentage": 0.1,
        "medium_price_percentage": 0.1,
        "lower_price_range_threshold": 0,
        "upper_price_range_threshold": 999999999
      }
    }
  }
  
df = pd.read_json('PTV_FLAT-CN.py', orient='columns')
df.shape

(1505, 25)

In [13]:
df.head()

Unnamed: 0,item_id,item_group_code,country_code,period_seq,competitor_item_id,loc_distance_euclidean,distance_euclidean,brand,price_own,loc_price_own,...,price_competitor,loc_price_competitor,salesunits_competitor,loc_salesunits_competitor,wgt_distr_competitor,loc_wgt_distr_competitor,my_rank,period_rank,wgt_distr_competitor_within_wgt_distr_own_,wgt_distr_competitor_within_wgt_distr_own_sum
75,102708799,PTV_FLAT,CN,2643,155616288,1,0.093826,SAMSUNG,2574.79,1,...,2641.6,1,331.0,1,0.290766,1,4.0,1,1,5
76,102708799,PTV_FLAT,CN,2644,155616288,1,0.093826,SAMSUNG,2754.86,1,...,2734.11,1,123.0,1,0.305386,1,4.0,2,1,5
77,102708799,PTV_FLAT,CN,2645,155616288,1,0.093826,SAMSUNG,2429.0,1,...,2789.94,1,85.0,1,0.480029,1,4.0,3,1,5
78,102708799,PTV_FLAT,CN,2646,155616288,1,0.093826,SAMSUNG,2582.08,1,...,2715.714286,1,84.0,1,0.205709,1,4.0,4,1,5
79,102708799,PTV_FLAT,CN,2647,155616288,1,0.093826,SAMSUNG,2658.5,1,...,2682.61,1,157.0,1,0.337947,1,4.0,5,1,5


In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1505 entries, 75 to 18659
Data columns (total 25 columns):
 #   Column                                         Non-Null Count  Dtype  
---  ------                                         --------------  -----  
 0   item_id                                        1505 non-null   int64  
 1   item_group_code                                1505 non-null   object 
 2   country_code                                   1505 non-null   object 
 3   period_seq                                     1505 non-null   int64  
 4   competitor_item_id                             1505 non-null   int64  
 5   loc_distance_euclidean                         1505 non-null   int64  
 6   distance_euclidean                             1505 non-null   float64
 7   brand                                          1505 non-null   object 
 8   price_own                                      1505 non-null   float64
 9   loc_price_own                                  150

Verify the code works:

In [15]:
calc_slope(df, ['item_id', 'competitor_item_id'], 'salesunits_own', 'slope_own')

Unnamed: 0,item_id,item_group_code,country_code,period_seq,competitor_item_id,loc_distance_euclidean,distance_euclidean,brand,price_own,loc_price_own,...,loc_price_competitor,salesunits_competitor,loc_salesunits_competitor,wgt_distr_competitor,loc_wgt_distr_competitor,my_rank,period_rank,wgt_distr_competitor_within_wgt_distr_own_,wgt_distr_competitor_within_wgt_distr_own_sum,slope_own
0,102708799,PTV_FLAT,CN,2643,155616288,1,0.093826,SAMSUNG,2574.790000,1,...,1,331.00,1,0.290766,1,4.0,1,1,5,9.600
1,102708799,PTV_FLAT,CN,2644,155616288,1,0.093826,SAMSUNG,2754.860000,1,...,1,123.00,1,0.305386,1,4.0,2,1,5,9.600
2,102708799,PTV_FLAT,CN,2645,155616288,1,0.093826,SAMSUNG,2429.000000,1,...,1,85.00,1,0.480029,1,4.0,3,1,5,9.600
3,102708799,PTV_FLAT,CN,2646,155616288,1,0.093826,SAMSUNG,2582.080000,1,...,1,84.00,1,0.205709,1,4.0,4,1,5,9.600
4,102708799,PTV_FLAT,CN,2647,155616288,1,0.093826,SAMSUNG,2658.500000,1,...,1,157.00,1,0.337947,1,4.0,5,1,5,9.600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1500,165186250,PTV_FLAT,CN,2643,143860451,1,0.099616,PANDA,1098.987705,1,...,1,313.97,1,0.320317,1,5.0,1,1,5,2.524
1501,165186250,PTV_FLAT,CN,2644,143860451,1,0.099616,PANDA,1099.000000,1,...,1,190.94,1,0.125848,1,5.0,2,1,5,2.524
1502,165186250,PTV_FLAT,CN,2645,143860451,1,0.099616,PANDA,930.639227,1,...,1,303.00,1,0.126760,1,5.0,3,1,5,2.524
1503,165186250,PTV_FLAT,CN,2646,143860451,1,0.099616,PANDA,994.497630,1,...,1,536.00,1,0.160636,1,5.0,4,1,5,2.524


In [16]:
%timeit calc_slope(df, ['item_id', 'competitor_item_id'], 'salesunits_own', 'slope_own')

42.6 ms ± 2.67 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


Let’s take a look and see where the time is spent during this operation (limited to the most time consuming four calls) using the prun ipython magic function:

In [17]:
%prun -l 4 calc_slope(df, ['item_id', 'competitor_item_id'], 'salesunits_own', 'slope_own')

 

         54989 function calls (54272 primitive calls) in 0.079 seconds

   Ordered by: internal time
   List reduced from 682 to 4 due to restriction <4>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
  664/656    0.006    0.000    0.006    0.000 {built-in method numpy.core._multiarray_umath.implement_array_function}
      301    0.004    0.000    0.039    0.000 <ipython-input-11-d98fa803f074>:19(slope)
        1    0.002    0.002    0.049    0.049 {pandas._libs.reduction.apply_frame_axis0}
      925    0.002    0.000    0.004    0.000 generic.py:5138(__setattr__)

# Numba

> NOTE: As of Numba version 0.20, pandas objects cannot be passed directly to Numba-compiled functions. Instead, one must pass the NumPy array underlying the pandas object to the Numba-compiled function.



- Pandas is not understood by Numba and as a result Numba would simply run the code via the interpreter but with the added cost of the Numba internal overheads!
- To use JIT compile with Numba, we need to write code based on vectorizationa & broadcasting technique.
- Vectorizing the code only plays well with Numpy and simple Python syntax.
- Instead of using a Pandas `apply`, separate out numerical calculations into a Numba sub-function.
- `pyyaml` - enables configuration of Numba via a YAML config file.
- The parallel option for jit() can produce diagnostic information about the transforms undertaken in automatically parallelizing the decorated code. This information can be accessed in two ways, the first is by setting the environment variable `NUMBA_PARALLEL_DIAGNOSTICS`, the second is by calling `parallel_diagnostics()`, both methods give the same information and print to STDOUT. The level of verbosity in the diagnostic information is controlled by an integer argument of value between 1 and 4 inclusive, 1 being the least verbose and 4 the most.



In [18]:

@numba.jit
def f_plain(x):
    return x * (x - 1)


@numba.jit
def integrate_f_numba(a, b, N):
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f_plain(a + i * dx)
    return s * dx


@numba.jit
def apply_integrate_f_numba(col_a, col_b, col_N):
#     n = len(col_N)
    n = col_N.shape[0]
    result = np.empty(n, dtype=np.float64)
    assert len(col_a) == len(col_b) == n
    for i in range(n):
        result[i] = integrate_f_numba(col_a[i], col_b[i], col_N[i])
    return result


def compute_numba(df):
    result = apply_integrate_f_numba(df['a'].to_numpy(),
                                     df['b'].to_numpy(),
                                     df['N'].to_numpy())
    return pd.Series(result, index=df.index, name='result')

In [19]:
rand_df = pd.DataFrame({'a': np.random.randn(1000),
                   'b': np.random.randn(1000),
                   'N': np.random.randint(100, 1000, (1000)),
                   'x': 'x'})



In [20]:
%timeit compute_numba(rand_df)

1.16 ms ± 186 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [44]:
# Numba optimized code
def calc_optimized_slope(data: pd.DataFrame, 
                         key_columns: List[str], 
                         slope_column: str, 
                         fact_name: str) -> pd.DataFrame:
    if not data.empty:
        pdf_with_slope = (
            data.sort_values(by="period_seq").\
                 groupby(key_columns)[slope_column].\
                 apply(apply_optimized_slope, raw=true).\
                 reset_index()
        )
        if pdf_with_slope.empty:
            data[fact_name] = 0.0
        else:
            pdf_with_slope.columns = key_columns + [fact_name]
            data = data.merge(pdf_with_slope, on=key_columns)
            return data
    else:
        return pd.DataFrame(columns=data.columns.to_list() + [fact_name])


# def apply_optimized_slope(data: np.ndarray, sales_column: str):
#     return the_real_calculation(data[sales_column].to_numpy(), 
#                                 np.array(data.shape[0]))

@numba.jit
def apply_optimized_slope(sales: np.ndarray)-> float:

    num_periods = np.arange(float(size))
    print(size)
    print(sales.shape)
    print(sales)

#     sum_num_periods = np.sum(num_periods)
#     sum_sales = np.sum(sales)

#     sum_num_periods_sales = np.dot(num_periods, sales)
#     sum_num_periods_square = np.dot(num_periods, num_periods)

#     square_of_sum_num_periods = sum_num_periods * sum_num_periods
#     slope_num = size * sum_num_periods_sales - sum_num_periods * sum_sales
#     slope_den = size * sum_num_periods_square - square_of_sum_num_periods

#     square_of_sum_num_periods = np.multiply(sum_num_periods, sum_num_periods)
#     slope_num = np.subtract((size * sum_num_periods_sales), (sum_num_periods * sum_sales))
#     slope_den = np.subtract((size * sum_num_periods_square), square_of_sum_num_periods)

#     return np.divide(slope_num, slope_den)
    return 0.0


In [40]:
np.arange(5.0)

array([0., 1., 2., 3., 4.])

In [45]:
calc_optimized_slope(df, ['item_id', 'competitor_item_id'], 'salesunits_own', 'slope_own')

Compilation is falling back to object mode WITH looplifting enabled because Function "the_real_calculation" failed type inference due to: No implementation of function Function(<class 'float'>) found for signature:
 
 >>> float(array(int64, 0d, C))
 
There are 2 candidate implementations:
      - Of which 2 did not match due to:
      Overload in function 'Float.generic': File: numba/core/typing/builtins.py: Line 941.
        With argument(s): '(array(int64, 0d, C))':
       Rejected as the implementation raised a specific error:
         TypeError: float() only support for numbers
  raised from /Users/jean.metz/workspace/GFK/consulting-engine/.venv/lib/python3.7/site-packages/numba/core/typing/builtins.py:947

During: resolving callee type: Function(<class 'float'>)
During: typing of call at <ipython-input-44-a8b9f0e7d5c4> (30)


File "<ipython-input-44-a8b9f0e7d5c4>", line 30:
def the_real_calculation(sales: np.ndarray, size: int)-> float:
    <source elided>

    num_periods = np.ar

5
(5,)
[19. 27. 17. 13. 74.]
5
(5,)
[19. 27. 17. 13. 74.]
5
(5,)
[3. 6. 3. 9. 5.]
5
(5,)
[3. 6. 3. 9. 5.]
5
(5,)
[19. 19. 82. 19. 35.]
5
(5,)
[ 8393.  1819.  6615. 11739.  1870.]
5
(5,)
[1502.    805.   1172.    594.94  510.  ]
5
(5,)
[1502.    805.   1172.    594.94  510.  ]
5
(5,)
[43. 27. 35. 29. 55.]
5
(5,)
[18. 19. 20. 14. 17.]
5
(5,)
[80. 63. 69. 77. 45.]
5
(5,)
[9605.   4547.   5444.97 2314.94 3656.  ]
5
(5,)
[9605.   4547.   5444.97 2314.94 3656.  ]
5
(5,)
[9605.   4547.   5444.97 2314.94 3656.  ]
5
(5,)
[ 5. 14.  7. 14. 20.]
5
(5,)
[22. 19. 34. 32. 56.]
5
(5,)
[22. 19. 34. 32. 56.]
5
(5,)
[8. 2. 4. 7. 4.]
5
(5,)
[4633. 4011. 3760. 2755. 2199.]
5
(5,)
[4633. 4011. 3760. 2755. 2199.]
5
(5,)
[4633. 4011. 3760. 2755. 2199.]
5
(5,)
[21. 15. 37. 25.  7.]
5
(5,)
[21. 15. 37. 25.  7.]
5
(5,)
[1. 4. 4. 6. 6.]
5
(5,)
[7. 4. 4. 3. 1.]
5
(5,)
[7. 4. 4. 3. 1.]
5
(5,)
[ 4. 12. 12.  9. 13.]
5
(5,)
[1. 2. 3. 1. 1.]
5
(5,)
[109.  59.  48.  11.   4.]
5
(5,)
[3. 2. 8. 8. 1.]
5
(5,)
[2342. 1037. 

Unnamed: 0,item_id,item_group_code,country_code,period_seq,competitor_item_id,loc_distance_euclidean,distance_euclidean,brand,price_own,loc_price_own,...,loc_price_competitor,salesunits_competitor,loc_salesunits_competitor,wgt_distr_competitor,loc_wgt_distr_competitor,my_rank,period_rank,wgt_distr_competitor_within_wgt_distr_own_,wgt_distr_competitor_within_wgt_distr_own_sum,slope_own
0,102708799,PTV_FLAT,CN,2643,155616288,1,0.093826,SAMSUNG,2574.790000,1,...,1,331.00,1,0.290766,1,4.0,1,1,5,0.0
1,102708799,PTV_FLAT,CN,2644,155616288,1,0.093826,SAMSUNG,2754.860000,1,...,1,123.00,1,0.305386,1,4.0,2,1,5,0.0
2,102708799,PTV_FLAT,CN,2645,155616288,1,0.093826,SAMSUNG,2429.000000,1,...,1,85.00,1,0.480029,1,4.0,3,1,5,0.0
3,102708799,PTV_FLAT,CN,2646,155616288,1,0.093826,SAMSUNG,2582.080000,1,...,1,84.00,1,0.205709,1,4.0,4,1,5,0.0
4,102708799,PTV_FLAT,CN,2647,155616288,1,0.093826,SAMSUNG,2658.500000,1,...,1,157.00,1,0.337947,1,4.0,5,1,5,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1500,165186250,PTV_FLAT,CN,2643,143860451,1,0.099616,PANDA,1098.987705,1,...,1,313.97,1,0.320317,1,5.0,1,1,5,0.0
1501,165186250,PTV_FLAT,CN,2644,143860451,1,0.099616,PANDA,1099.000000,1,...,1,190.94,1,0.125848,1,5.0,2,1,5,0.0
1502,165186250,PTV_FLAT,CN,2645,143860451,1,0.099616,PANDA,930.639227,1,...,1,303.00,1,0.126760,1,5.0,3,1,5,0.0
1503,165186250,PTV_FLAT,CN,2646,143860451,1,0.099616,PANDA,994.497630,1,...,1,536.00,1,0.160636,1,5.0,4,1,5,0.0


In [23]:
%timeit calc_optimized_slope(df, ['item_id', 'competitor_item_id'], 'salesunits_own', 'slope_own')

5
(5,)
[19. 27. 17. 13. 74.]
5
(5,)
[19. 27. 17. 13. 74.]
5
(5,)
[3. 6. 3. 9. 5.]
5
(5,)
[3. 6. 3. 9. 5.]
5
(5,)
[19. 19. 82. 19. 35.]
5
(5,)
[ 8393.  1819.  6615. 11739.  1870.]
5
(5,)
[1502.    805.   1172.    594.94  510.  ]
5
(5,)
[1502.    805.   1172.    594.94  510.  ]
5
(5,)
[43. 27. 35. 29. 55.]
5
(5,)
[18. 19. 20. 14. 17.]
5
(5,)
[80. 63. 69. 77. 45.]
5
(5,)
[9605.   4547.   5444.97 2314.94 3656.  ]
5
(5,)
[9605.   4547.   5444.97 2314.94 3656.  ]
5
(5,)
[9605.   4547.   5444.97 2314.94 3656.  ]
5
(5,)
[ 5. 14.  7. 14. 20.]
5
(5,)
[22. 19. 34. 32. 56.]
5
(5,)
[22. 19. 34. 32. 56.]
5
(5,)
[8. 2. 4. 7. 4.]
5
(5,)
[4633. 4011. 3760. 2755. 2199.]
5
(5,)
[4633. 4011. 3760. 2755. 2199.]
5
(5,)
[4633. 4011. 3760. 2755. 2199.]
5
(5,)
[21. 15. 37. 25.  7.]
5
(5,)
[21. 15. 37. 25.  7.]
5
(5,)
[1. 4. 4. 6. 6.]
5
(5,)
[7. 4. 4. 3. 1.]
5
(5,)
[7. 4. 4. 3. 1.]
5
(5,)
[ 4. 12. 12.  9. 13.]
5
(5,)
[1. 2. 3. 1. 1.]
5
(5,)
[109.  59.  48.  11.   4.]
5
(5,)
[3. 2. 8. 8. 1.]
5
(5,)
[2342. 1037. 