In [15]:
import sys
import os

os.environ['JAVA_HOME'] = '/usr/lib/jvm/java-1.8.0-openjdk-amd64'
os.environ['PYSPARK_PYTHON'] = '/home/aadi/miniconda3/envs/pyspark_env/bin/python' 
os.environ['PYSPARK_DRIVER_PYTHON'] = '/home/aadi/miniconda3/envs/pyspark_env/bin/python' 

# Decorators
*Why use decorators instead of (e.g.) asserts*
1. Clutters functions with error-checking logic
2. If validation logic needs to change, many inline copies need to be found and updated 

## Anatomy of a Decorator

```python
def decorator(input_fn):
    def _decorate(*args, **kwargs):
        print('decorating')

        return input_fn(*args, **kwargs)

    return _decorate
```

In [16]:
def fndecorator(input_fn):
    def decorator():
        print('This is from decorator')

        return input_fn()

    return decorator 


@fndecorator
def new_fn():
    print('from original function')

In [17]:
new_fn()

This is from decorator
from original function


In [18]:
def manager_albany(*args):
    BLUE = '\033[94m'
    BOLD = '\33[5m'
    SELECT = '\33[7m'
    for arg in args:
        print(BLUE + BOLD + SELECT + str(arg))

In [19]:
def function_with_input(*args):
    for arg in args:
        print(arg)

def add_line_function(function_with_input):
    def add_line(*args):
        print('ADDED LINE')
        return function_with_input(*args)
    return add_line

In [20]:
@add_line_function
def fn(*args):
    for arg in args:
        print(arg)

In [21]:
fn('a', 'b', 'c')

ADDED LINE
a
b
c


In [22]:
def decorator(input_fn):
    def _decorate(*args, **kwargs):
        print('decorating')

        return input_fn(*args, **kwargs)

    return _decorate

In [23]:
@decorator
def fn(input_arg):
    print(input_arg)
    

In [24]:
def datefixer(fn):
    import datetime
    def decorator(*args):
        newargs = []
        for arg in args:
            if isinstance(arg, datetime.date):
                arg = arg.weekday(), arg.day, arg.month, arg.year
            newargs.append(arg)
        return fn(*newargs)
    return decorator


In [25]:
@datefixer
def set_holidays(*args):
    return args[0]

In [26]:
from datetime import datetime as dt

some_date = dt.strptime('2022-12-25', '%Y-%m-%d')
some_date

datetime.datetime(2022, 12, 25, 0, 0)

In [27]:
set_holidays(some_date)

(6, 25, 12, 2022)

In [28]:
import pandas as pd
from pyspark.sql import SparkSession

sc = SparkSession.builder.appName('decorators').getOrCreate()

data = pd.DataFrame({
    'american': ['06/07/2022'],
    'monthname': ['06/July/2022'],
    'julian': ['1997/310'],
    'inversejulian': ['310/1997'],

})

data

22/12/28 18:16:03 WARN Utils: Your hostname, debian resolves to a loopback address: 127.0.1.1; using 192.168.100.213 instead (on interface wlp5s0)
22/12/28 18:16:03 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address


Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


22/12/28 18:16:03 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


Unnamed: 0,american,monthname,julian,inversejulian
0,06/07/2022,06/July/2022,1997/310,310/1997


In [29]:
df = sc.createDataFrame(data)
df.show()

  for column, series in pdf.iteritems():
  for column, series in pdf.iteritems():
                                                                                

+----------+------------+--------+-------------+
|  american|   monthname|  julian|inversejulian|
+----------+------------+--------+-------------+
|06/07/2022|06/July/2022|1997/310|     310/1997|
+----------+------------+--------+-------------+



In [14]:
from pyspark.sql import functions as F 
df.withColumn('american', F.to_date('american', 'dd/MM/yyyy')).show()

NameError: name 'df' is not defined

In [205]:

df.withColumn('julian', F.to_date('julian', 'yyyy/DDD')).show()

+----------+------------+----------+-------------+
|  american|   monthname|    julian|inversejulian|
+----------+------------+----------+-------------+
|06/07/2022|06/July/2022|1997-11-06|     310/1997|
+----------+------------+----------+-------------+



In [206]:

df.withColumn('inversejulian', F.to_date('inversejulian', 'DDD/yyyy')).show()

+----------+------------+--------+-------------+
|  american|   monthname|  julian|inversejulian|
+----------+------------+--------+-------------+
|06/07/2022|06/July/2022|1997/310|   1997-11-06|
+----------+------------+--------+-------------+



In [207]:
df.withColumn('monthname', F.to_date('monthname', 'dd/LL/yyyy')).show()

+----------+---------+--------+-------------+
|  american|monthname|  julian|inversejulian|
+----------+---------+--------+-------------+
|06/07/2022|     null|1997/310|     310/1997|
+----------+---------+--------+-------------+



In [127]:
import functools


date_cols = {
    'american': ['american', 'american1'],
    'julian': ['julian']
}


def datefixer(dateconf):
    import pyspark
    def _datefixer(func):

        @functools.wraps(func)
        def wrapper(df, *args, **kwargs):
            df_dateconf = {}
            for key, values in dateconf.items():
                df_dateconf[key] = [i for i in df.columns if i in values]


            for dateformat in df_dateconf.keys():
                for datecolumn in df_dateconf[dateformat]:
                    print('converting', dateformat)
                    if dateformat == 'american':
                        df = df.withColumn(datecolumn, F.to_date(datecolumn, 'dd/MM/yyyy'))
                    if dateformat == 'julian':
                        df = df.withColumn(datecolumn, F.to_date(datecolumn, 'yyyy/DDD'))
            return func(df, *args, **kwargs)

        return wrapper

    return _datefixer

In [128]:
@datefixer(dateconf=date_cols)
def test(df):

    return df 

        

In [129]:
df.show()
test(df=df).show()

+----------+--------+-------------+
|  american|  julian|inversejulian|
+----------+--------+-------------+
|06/07/2022|1997/310|     310/1997|
+----------+--------+-------------+

converting american
converting julian
+----------+----------+-------------+
|  american|    julian|inversejulian|
+----------+----------+-------------+
|2022-07-06|1997-11-06|     310/1997|
+----------+----------+-------------+



In [154]:
import pyspark 
def fn(*args, **kwargs):
    for k, v in kwargs.items():
        if isinstance(v, pyspark.sql.DataFrame):
            kwargs[k] = kwargs[k].withColumn('new', F.lit(0))
    return kwargs 
    print(kwargs)


In [179]:
def datefixer(conf):
    import pyspark
    def _datefixer(func):

        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            for k, v in kwargs.items():
                if isinstance(v, pyspark.sql.DataFrame):
                    print('found a dataframe')
                    kwargs[k] = kwargs[k].withColumn('col', F.lit(0))

            return func(*args, **kwargs)

        return wrapper

    return _datefixer
        

In [180]:
@datefixer(conf=None)
def process(df):
    return df

In [181]:
process(df=df).show()

found a dataframe
+----------+--------+-------------+---+
|  american|  julian|inversejulian|col|
+----------+--------+-------------+---+
|06/07/2022|1997/310|     310/1997|  0|
+----------+--------+-------------+---+



## Decorators with Args

this 
```python
@decorator(args)
def func():
    pass
```

is essentially this
```python
func = decorator(args)(func)
```

In [3]:
from functools import wraps

def debug(prefix=''): # this outer function provides an "environment" for the inner functions
    def decorate(func):
        msg = prefix + func.__qualname__

        @wraps(func)
        def wrapper(*args, **kwargs):
            print(msg)
            return func(*args, **kwargs)

        return wrapper

    return decorate


In [4]:
# usage
@debug('***')
def foo(a=1):
    return a**2

In [5]:
foo()

***foo


1

In [None]:
# reducing repetition
from functools import wraps, partial

def debug(func=None, *, prefix=''):
    if func is None:
        return partial(debug, prefix=prefix)

In [32]:
def fndecorator(input_fn):

    df = pd.DataFrame()
    def decorator(*args, **kwargs):
        result = input_fn(*args, **kwargs)

        return result
    return decorator 


def process(*args, **kwargs):
    print(locals())
    return 0

In [33]:
process = fndecorator(process)

In [37]:
def say_hello(name):
    return f'Hello {name}'


def say_yo(name):
    return f'Yo {name}'

In [38]:
def greet(greeter):
    return greeter('Bob')

In [40]:
greet(say_hello) 
greet(say_yo) 

'Yo Bob'

# structure for slides

In [48]:
# first-class objects
# being able to pass in and return functions
# passing it in two ways, with () calls the function, without is a reference
def run_tf_model(data):
    return f'tf model run on {data}' 

def run_torch_model(data):
    return f'torch model run on {data}' 

def run_model_on_data(runner):
    return runner(data=[1, 2, 3])

run_model_on_data(run_torch_model), run_model_on_data(run_tf_model)

# being able to return functions
def parent(num):
    def first_child():
        return "Hi, I am Emma"

    def second_child():
        return "Call me Liam"

    if num == 1:
        return first_child
    else:
        return second_child

first = parent(1)
second = parent(2)

first(), second()

('Hi, I am Emma', 'Call me Liam')

In [54]:
from functools import wraps
# decorators are not inherently complex
# they just take functions in, modify the behaviour in-place, and return the function
# we define an inner function wrapper which adds functionality to f
#   we can intercept the inputs and the outputs of the function
#   and return it, so now, foo is not being called, but wrapper is!
#   show with and without wraps

def decorator(f):

    # @wraps(f)
    def wrapper(*args, **kwargs):
        print('within decorator')
        result = f(*args, **kwargs)

        return result

    return wrapper

def foo(*args, **kwargs):
    print('foo')
    return 0

print(foo.__name__)
foo = decorator(foo)
print(foo.__name__)

foo
foo


In [55]:
def decorator(conf):

    def _wrapper(f):

        @wraps(f)
        def _wrapped(*args, **kwargs):

            return f(*args, **kwargs)

        return _wrapped

    return _wrapper

In [58]:
@decorator(None)
def baz(*args, **kwargs):
    print('baz')

In [59]:
baz()

baz
