In [2]:
#Creating my own partial function
def add(a,b): return a+b

def partial(func, *args):
    p_args = args
    
    def inner(*inner_args):
        return func(*(p_args + inner_args))
    
    return inner

add_two = partial(add,2)
print(add_two(7))


9


In [3]:
#Decorator examples 

def catch_error(a_func):
    def inner_func(*args):
        try:
            return a_func(*args)
        except Exception as e:
            return e
    return inner_func

@catch_error
def throws_error():
    raise Exception("throws_error()")
    
print(throws_error())


throws_error()


In [2]:
#Decorator with another example
def logger(func):
    def inner(*args):
        print("Function name:{}".format(func.__name__))
        print("args: {}".format(args))
        return func(*args)
    return inner

@logger
def multiply(a,b):
    return a * b 

print(multiply(2,2))

Function name:multiply
args: (2, 2)
4


In [23]:
#Creating Pipeline class and its tasks

class Pipeline:
    def __init__(self):
        self.tasks = []
    
    def add_task(self, depends_on = None):
        i  = 0
        if depends_on:
            i  = self.tasks.index(depends_on) + 1
        def inner(f):
            self.tasks.insert(i,f)
            return f
        return inner
    def run_tasks(self, inpt):
        out = inpt
        for f in self.tasks:
            out = f(out)
            
        return out
    
pipeline = Pipeline()
'''
@pipeline.add_task()
def first_task(x):
    return x+1

@pipeline.add_task(depends_on = first_task)
def second_task(x):
    return x*2

@pipeline.add_task(depends_on = second_task)
def last_task(x):
    return x - 4

print(pipeline.run_tasks(20))
'''

'\n@pipeline.add_task()\ndef first_task(x):\n    return x+1\n\n@pipeline.add_task(depends_on = first_task)\ndef second_task(x):\n    return x*2\n\n@pipeline.add_task(depends_on = second_task)\ndef last_task(x):\n    return x - 4\n\nprint(pipeline.run_tasks(20))\n'

In [24]:
import io
import itertools
import csv
from datetime import datetime

def parse_time(time_str):
    """
    Parses time in the format [day/month/year:HH:MM:SS +####]
    to a datetime object
    """
    time_obj = datetime.strptime(time_str, '[%d/%b/%Y:%H:%M:%S %z]')
    return time_obj

def strip_quotes(s):
    return s.replace('"', '')


def parse_log(log):
    for line in log:
        split_line = line.split()
        if len(split_line) <= 1:
            continue
        remote_addr = split_line[0]
    
        time_local = split_line[3] + " " + split_line[4]
        time_local = parse_time(time_local)
        
        request_type = split_line[5]
        request_type = strip_quotes(request_type) 
        
        request_path = split_line[6]
        status = split_line[8]
        body_bytes_sent = split_line[9]
        http_referrer = split_line[10]
        http_referrer = strip_quotes(http_referrer)
        
        http_user_agent = " ".join(split_line[11:])
        http_user_agent = strip_quotes(http_user_agent)
        
        yield (
            remote_addr, time_local, request_type, request_path,
            status, body_bytes_sent, http_referrer, http_user_agent
        )
        
def build_csv(lines, header=None, file=None):
    if header:
        lines = itertools.chain([header], lines)
    writer = csv.writer(file, delimiter=',')
    writer.writerows(lines)
    file.seek(0)
    return file

def count_unique_request(csv_file):
    reader = csv.reader(csv_file)
    header = next(reader)
    idx = header.index('request_type')

    uniques = {}
    for line in reader:

        if not uniques.get(line[idx]):
            uniques[line[idx]] = 0
        uniques[line[idx]] += 1
    return ((k, v) for k,v in uniques.items())


class Pipeline:
    def __init__(self):
        self.tasks = []
    
    def add_task(self, depends_on = None):
        i  = 0
        if depends_on:
            i  = self.tasks.index(depends_on) + 1
        def inner(f):
            self.tasks.insert(i,f)
            return f
        return inner
    def run_tasks(self, inpt):
        out = inpt
        for f in self.tasks:
            out = f(out)
            
        return out

pipeine = Pipeline()


@pipeline.add_task()
def log_parse(logs):
    return parse_log(logs)

@pipeline.add_task(depends_on = log_parse)
def build_csv_file(lines):
    return build_csv(lines,  header = [
    'ip', 'time_local', 'request_type',
        'request_path', 'status', 'bytes_sent',
        'http_referrer', 'http_user_agent'
    ],
    file = io.StringIO())


@pipeline.add_task(depends_on = build_csv_file)
def uq_count(csv_file):
    return count_unique_request(csv_file)

@pipeline.add_task(depends_on = uq_count)
def summ_csv(lines):
    return build_csv(lines, header = ['request_type', 'count'], file=io.StringIO())


logs = open('example_log.txt')
summarized_file = pipeline.run_tasks(logs)
print(summarized_file.readlines())

['request_type,count\r\n', 'PUT,3367\r\n', 'POST,3299\r\n', 'GET,3334\r\n']
