In [None]:
from featuretools.primitives import TransformPrimitive
from featuretools.tests.testing_utils import make_ecommerce_entityset
from featuretools.variable_types import DatetimeTimeIndex, NaturalLanguage, Numeric
import featuretools as ft
import numpy as np
import re

In [None]:
class StringCount(TransformPrimitive):
    '''Count the number of times the string value occurs.'''
    name = 'string_count'
    input_types = [NaturalLanguage]
    return_type = Numeric

    def __init__(self, string=None):
        self.string = string

    def get_function(self):
        def string_count(column):
            assert self.string is not None, "string to count needs to be defined"
            # this is a naive implementation used for clarity
            counts = [text.lower().count(self.string) for text in column]
            return counts

        return string_count

In [None]:
es = make_ecommerce_entityset()

feature_matrix, features = ft.dfs(
    entityset=es,
    target_entity="sessions",
    agg_primitives=["sum", "mean", "std"],
    trans_primitives=[StringCount(string="the")],
)

feature_matrix[[
    'STD(log.STRING_COUNT(comments, string=the))',
    'SUM(log.STRING_COUNT(comments, string=the))',
    'MEAN(log.STRING_COUNT(comments, string=the))',
]]

In [None]:
class CaseCount(TransformPrimitive):
    '''Return the count of upper case and lower case letters of a text.'''
    name = 'case_count'
    input_types = [NaturalLanguage]
    return_type = Numeric
    number_output_features = 2

    def get_function(self):
        def case_count(array):
            # this is a naive implementation used for clarity
            upper = np.array([len(re.findall('[A-Z]', i)) for i in array])
            lower = np.array([len(re.findall('[a-z]', i)) for i in array])
            return upper, lower

        return case_count

In [None]:
feature_matrix, features = ft.dfs(
    entityset=es,
    target_entity="sessions",
    agg_primitives=[],
    trans_primitives=[CaseCount],
)

feature_matrix[[
    'customers.CASE_COUNT(favorite_quote)[0]',
    'customers.CASE_COUNT(favorite_quote)[1]',
]]

In [None]:
class HourlySineAndCosine(TransformPrimitive):
    '''Returns the sine and cosine of the hour.'''
    name = 'hourly_sine_and_cosine'
    input_types = [DatetimeTimeIndex]
    return_type = Numeric
    number_output_features = 2

    def get_function(self):
        def hourly_sine_and_cosine(column):
            sine = np.sin(column.dt.hour)
            cosine = np.cos(column.dt.hour)
            return sine, cosine

        return hourly_sine_and_cosine

    def generate_names(self, base_feature_names):
        name = self.generate_name(base_feature_names)
        return f'{name}[sine]', f'{name}[cosine]'

In [None]:
feature_matrix, features = ft.dfs(
    entityset=es,
    target_entity="log",
    agg_primitives=[],
    trans_primitives=[HourlySineAndCosine],
)

feature_matrix.head()[[
    'HOURLY_SINE_AND_COSINE(datetime)[sine]',
    'HOURLY_SINE_AND_COSINE(datetime)[cosine]',
]]