In [1]:
import pandas as pd

pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)

# Custom Functions
It is possible to deploy your own custom function when it comes to `row_apply` or
`column_apply`. Let's show some examples.


## Row Apply

The `row_apply` function is where the `row_func` will receive each row as the
input arguments.

### Lambda Functions
In most cases, you can rely on a lambda function if you wish to do this on the fly:


In [2]:
from data_fabricator.v0.core.fabricator import MockDataGenerator
import yaml

config_str = """
sample_table:
  num_rows: 10
  columns:
    id:
      type: generate_unique_id
      seed: 1 # defaults to None
    id_plus_one:
      type: row_apply
      list_of_values: sample_table.id
      row_func: "lambda x: int(x)+1"
    id_plus_id:
      type: row_apply
      list_of_values: [sample_table.id, sample_table.id_plus_one]
      row_func: "lambda *args: int(args[0]) + args[1]"
"""
example_config = yaml.safe_load(config_str)
print(example_config)

mock_generator = MockDataGenerator(instructions=example_config)
mock_generator.generate_all()

print(mock_generator.all_dataframes["sample_table"])

{'sample_table': {'num_rows': 10, 'columns': {'id': {'type': 'generate_unique_id', 'seed': 1}, 'id_plus_one': {'type': 'row_apply', 'list_of_values': 'sample_table.id', 'row_func': 'lambda x: int(x)+1'}, 'id_plus_id': {'type': 'row_apply', 'list_of_values': ['sample_table.id', 'sample_table.id_plus_one'], 'row_func': 'lambda *args: int(args[0]) + args[1]'}}}}
   id  id_plus_one  id_plus_id
0   1            2           3
1   2            3           5
2   3            4           7
3   4            5           9
4   5            6          11
5   6            7          13
6   7            8          15
7   8            9          17
8   9           10          19
9  10           11          21


  from data_fabricator.v0.core.fabricator import MockDataGenerator


Notice we can also pass in multiple column's row's into our custom function using
the list syntax in the `list_of_values`. Also that `generate_unique_id` returns a
string, so we need to convert it to an integer before doing any arithmatic.

### Custom Functions
If you wish to write more involved functions, you may do like the following:

In [3]:
from data_fabricator.v0.core.fabricator import MockDataGenerator
import yaml


def my_custom_row_func1(x):
    # this can be anything in here
    print(f"In my_custom_row_func1, x: {x}")
    return int(x) + 1


def my_custom_row_func2(x, y):
    # how to pass more than 1 argument
    print(f"In my_custom_row_func2, x: {x}")
    print(f"In my_custom_row_func2, y: {y}")
    return int(x) + y


config_str = """
sample_table:
  num_rows: 10
  columns:
    id:
      type: generate_unique_id
      seed: 1 # defaults to None
    id_plus_one:
      type: row_apply
      list_of_values: sample_table.id
      row_func: "__main__.my_custom_row_func1"
    id_plus_id:
      type: row_apply
      list_of_values: [sample_table.id, sample_table.id_plus_one]
      row_func: "__main__.my_custom_row_func2"
"""
example_config = yaml.safe_load(config_str)

mock_generator = MockDataGenerator(instructions=example_config)
mock_generator.generate_all()

print(mock_generator.all_dataframes["sample_table"])

In my_custom_row_func1, x: 1
In my_custom_row_func1, x: 2
In my_custom_row_func1, x: 3
In my_custom_row_func1, x: 4
In my_custom_row_func1, x: 5
In my_custom_row_func1, x: 6
In my_custom_row_func1, x: 7
In my_custom_row_func1, x: 8
In my_custom_row_func1, x: 9
In my_custom_row_func1, x: 10
In my_custom_row_func2, x: 1
In my_custom_row_func2, y: 2
In my_custom_row_func2, x: 2
In my_custom_row_func2, y: 3
In my_custom_row_func2, x: 3
In my_custom_row_func2, y: 4
In my_custom_row_func2, x: 4
In my_custom_row_func2, y: 5
In my_custom_row_func2, x: 5
In my_custom_row_func2, y: 6
In my_custom_row_func2, x: 6
In my_custom_row_func2, y: 7
In my_custom_row_func2, x: 7
In my_custom_row_func2, y: 8
In my_custom_row_func2, x: 8
In my_custom_row_func2, y: 9
In my_custom_row_func2, x: 9
In my_custom_row_func2, y: 10
In my_custom_row_func2, x: 10
In my_custom_row_func2, y: 11
   id  id_plus_one  id_plus_id
0   1            2           3
1   2            3           5
2   3            4           7
3 

In this particular case the function is defined under `__main__`, but similarly you
can put in the full function path in your project. For example
`row_func: my_project.module.function_file.function`.

This pattern allows you to unit test specific functions if the logic is more involved.

## Column Apply
In `column_apply`, the entire column will be passed into the function, which grants
you access to values across rows, which may be useful in some situations like
grouping or sorting.

In [4]:
from data_fabricator.v0.core.fabricator import MockDataGenerator
import yaml


def my_custom_col_func1(x):
    print(f"In my_custom_col_func1, x: {x}")
    return x


def my_custom_col_func2(x, y):
    print(f"In my_custom_col_func2, x: {x}")
    print(f"In my_custom_col_func2, y: {y}")
    return x


def reverse_values(x):
    x = [int(_) for _ in x]
    x.sort(reverse=True)
    return x


config_str = """
sample_table:
  num_rows: 10
  columns:
    id:
      type: generate_unique_id
      seed: 1 # defaults to None
    example_column_apply1:
      type: column_apply
      list_of_values: sample_table.id
      column_func: "__main__.my_custom_col_func1"
    example_column_apply2:
      type: column_apply
      list_of_values: [sample_table.id, sample_table.example_column_apply1]
      column_func: "__main__.my_custom_col_func2"
    example_column_apply3:
      type: column_apply
      list_of_values: sample_table.id
      column_func: "__main__.reverse_values"
"""
example_config = yaml.safe_load(config_str)

mock_generator = MockDataGenerator(instructions=example_config)
mock_generator.generate_all()

print(mock_generator.all_dataframes["sample_table"])

In my_custom_col_func1, x: ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
In my_custom_col_func2, x: ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
In my_custom_col_func2, y: ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
   id example_column_apply1 example_column_apply2  example_column_apply3
0   1                     1                     1                     10
1   2                     2                     2                      9
2   3                     3                     3                      8
3   4                     4                     4                      7
4   5                     5                     5                      6
5   6                     6                     6                      5
6   7                     7                     7                      4
7   8                     8                     8                      3
8   9                     9                     9                      2
9  10                    10      

Notice the difference between `column_apply` and `row_apply`. The `row_apply` function is
called 10 times, equivalent to the number of rows, but the `column_apply` is called once.
