In [None]:
# Environment Import
from experiment_graph.execution_environment import ExecutionEnvironment
from experiment_graph.graph.operations import UserDefinedFunction



In [None]:
class ClipDataset(UserDefinedFunction):
    def __init__(self, lower=None, upper=None, axis=None):
        super().__init__(return_type='Dataset')
        self.lower = lower
        self.upper= upper
        self.axis = axis
        
    def run(self, underlying_data):
        # here the underlying_data is a pandas dataframe and we are directly calling the pandas clip function
        return underlying_data.clip(lower=self.lower, upper=self.upper, axis=self.axis)

In [None]:
class CustomProject(UserDefinedFunction):
    def __init__(self, column):
        super().__init__(return_type='Feature')
        self.column = column
        
    def run(self, underlying_data):
        # here the underlying_data is a pandas dataframe but the udf is expected to return a Feature (i.e., dataseries)
        return underlying_data[self.column]

In [None]:
class MedianFeature(UserDefinedFunction):
    def __init__(self):
        super().__init__(return_type='Agg')
        
    def run(self, underlying_data):
        # here the underlying_data is pandas data series
        return underlying_data.median()

In [None]:
execution_environment = ExecutionEnvironment()
data = execution_environment.load('./data/sample.csv')

In [None]:
clip_oper = ClipDataset(lower=2,upper=4)
clipped = data.run_udf(clip_oper)

In [None]:
clipped.data()

In [None]:
projection = CustomProject('a')
feature_a = clipped.run_udf(projection)

In [None]:
feature_a.data()

In [None]:
median_feature=MedianFeature()
median_value = feature_a.run_udf(median_feature)

In [None]:
median_value.data()