In [2]:
# !pip install doit import-ipynb

In [3]:
from doit import load_ipython_extension
load_ipython_extension()
import pandas as pd
import hvplot.pandas
import import_ipynb

# Building Workflows in Jupyter Notebooks

## Functions

Functions are reusable blocks of code that perform a specific task. 
They help organize code, make it more readable, and allow you to avoid repetition by encapsulating logic that can be called multiple times throughout a program.

A function is defined using the def keyword followed by the function name and parentheses `()`. 
The code that performs the task is placed inside the function body, indented under the function definition

```python
def function_name(parameters):  # definition, name, parameters
    # body
    return value  # return (optional)
```

**Example** Make a function called `download_data` the replaces the below code:

```python
url = "https://uni-bonn.sciebo.de/s/FYJPmdTyPo1qwRX/download"
output_csv = "data/data.csv"
df = pd.read_csv(url)
df.to_csv(output_csv, index=False)
```

In [6]:
def download_data():
    url = "https://uni-bonn.sciebo.de/s/FYJPmdTyPo1qwRX/download"
    output_csv = "data/data.csv"
    df = pd.read_csv(url)
    df.to_csv(output_csv, index=False)

In [8]:
download_data()

Make a function called `calculate_stats` that replaces the below code

```python
csv_file = "data/data.csv"
output_csv = "data/data_stats.csv"
df = pd.read_csv(csv_file)
df_stat = df.describe().reset_index()
df_stat.to_csv(output_csv, index=False)
```

In [9]:
def calculate_stats():
    csv_file = "data/data.csv"
    output_csv = "data/data_stats.csv"
    df = pd.read_csv(csv_file)
    df_stat = df.describe().reset_index()
    df_stat.to_csv(output_csv, index=False)

In [14]:
calculate_stats()

Make a function called `create_visualization` that replaces the below code

```python
csv_file = "data/data.csv"
hist_col_name = 'response_time'
df = pd.read_csv(csv_file)
df[hist_col_name].hvplot.hist()
```

In [16]:
def create_vizualization():
    csv_file = "data/data.csv"
    hist_col_name = 'response_time'
    df = pd.read_csv(csv_file)
    df[hist_col_name].hvplot.hist()

**Example** Make `url` a parameter in `download_data` function

In [17]:
def download_data(url):
    output_csv = "data/data.csv"
    df = pd.read_csv(url)
    df.to_csv(output_csv, index=False)

In [18]:
download_data("https://uni-bonn.sciebo.de/s/FYJPmdTyPo1qwRX/download")

Make `csv_file` a parameter in `calculate_stats` function

In [19]:
def calculate_stats(csv_file):
    output_csv = "data/data_stats.csv"
    df = pd.read_csv(csv_file)
    df_stat = df.describe().reset_index()
    df_stat.to_csv(output_csv, index=False)

In [21]:
calculate_stats("data/data.csv")

Make `csv_file` a parameter in `create_visualization` function

In [None]:
def create_vizualization(csv_file):
    hist_col_name = 'response_time'
    df = pd.read_csv(csv_file)
    df[hist_col_name].hvplot.hist()

In [None]:
# calculate_stats("data/data.csv")

**Example** Make `url` and `output_csv` as parameters in `download_data` function

In [None]:
def download_data(url, output_csv):
    df = pd.read_csv(url)
    df.to_csv(output_csv, index=False)

Make `csv_file` and `output_csv` as parameters in `calculate_stats` function

In [None]:
def calculate_stats(csv_file, output_csv):
    df = pd.read_csv(csv_file)
    df_stat = df.describe().reset_index()
    df_stat.to_csv(output_csv, index=False)

Make `csv_file` and `hist_col_name` as parameters in `create_visualization` function

In [None]:
def create_vizualization(csv_file, hist_col_name):
    df = pd.read_csv(csv_file)
    df[hist_col_name].hvplot.hist()

## doit Workflows

**Example** Add a doit task called `download` that implements the below code

```python
def download_data():
    url = "https://uni-bonn.sciebo.de/s/FYJPmdTyPo1qwRX/download"
    output_csv = "data/data.csv"
    df = pd.read_csv(url)
    df.to_csv(output_csv, index=False)
```

In [None]:
def task_download():
    def download_data():
        url = "https://uni-bonn.sciebo.de/s/FYJPmdTyPo1qwRX/download"
        output_csv = "data/data.csv"
        df = pd.read_csv(url)
        df.to_csv(output_csv, index=False)
    
    return {
        'actions': [download_data],
    }

In [None]:
%doit list

Add a doit task called stats that implements the below code

```python
def calculate_stats():
    csv_file = "data/data.csv"
    output_csv = "data/data_stats.csv"
    df = pd.read_csv(csv_file)
    df_stat = df.describe().reset_index()
    df_stat.to_csv(output_csv, index=False)  
```

In [None]:
def task_stats():
    def calculate_stats():
        csv_file = "data/data.csv"
        output_csv = "data/data_stats.csv"
        df = pd.read_csv(csv_file)
        df_stat = df.describe().reset_index()
        df_stat.to_csv(output_csv, index=False)    
    return {
        'actions': [calculate_stats],
    }

In [None]:
%doit list

Add a doit task called `plot` that implements the below code

```python
def create_vizualization():
    csv_file = "data/data.csv"
    hist_col_name = 'response_time'
    df = pd.read_csv(csv_file)
    df[hist_col_name].hvplot.hist()
```

In [None]:
def task_plot():
    def create_vizualization():
        csv_file = "data/data.csv"
        hist_col_name = 'response_time'
        df = pd.read_csv(csv_file)
        df[hist_col_name].hvplot.hist()
    return {
        'actions': [create_vizualization],
    }

In [None]:
%doit list

**Example** Change `download` task so that it accepts `url` parameter

In [None]:
def task_download_data():
    def download_data(url):
        output_csv = "data/data.csv"
        df = pd.read_csv(url)
        df.to_csv(output_csv, index=False)
    
    return {
        'actions': [(download_data, ["https://uni-bonn.sciebo.de/s/FYJPmdTyPo1qwRX/download"])],
    }

In [None]:
%doit download_data

Change `stats` task so that it accepts `csv_file` argument

In [None]:
def task_stats():
    def calculate_stats(csv_file):
        output_csv = "data/data_stats.csv"
        df = pd.read_csv(csv_file)
        df_stat = df.describe().reset_index()
        df_stat.to_csv(output_csv, index=False)
    return {
        'actions': [(calculate_stats, ["data/data.csv"])],
    }

In [None]:
%doit stats

Change `plot` task so that it accepts `csv_file` argument

In [None]:
def task_plot():
    def create_vizualization(csv_file):
        hist_col_name = 'count'
        df = pd.read_csv(csv_file)
        df[hist_col_name].hvplot.hist()
    return {
        'actions': [(create_vizualization, ["data/data.csv"])],
    }

In [None]:
%doit plot

**Example** Change `download` task so that it accepts both `url` and `output_csv` parameters

In [None]:
def task_download():
    def download_data(url, output_csv):
        df = pd.read_csv(url)
        df.to_csv(output_csv, index=False)
    
    return {
        'actions': [(download_data, ["https://uni-bonn.sciebo.de/s/FYJPmdTyPo1qwRX/download", "data/data.csv"])],
    }

In [None]:
%doit

Change `stats` task so that it accepts both `csv_file` and `output_csv` parameters

In [None]:
def task_stats():
    def calculate_stats(csv_file, output_csv):
        df = pd.read_csv(csv_file)
        df_stat = df.describe().reset_index()
        df_stat.to_csv(output_csv, index=False)
    return {
        'actions': [(calculate_stats, ["data/data.csv", "data/data_stats.csv"])],
    }

In [None]:
%doit

Change `plot` task so that it accepts both `csvfile` and `hist_col_name` as parameters

In [None]:
def task_plot():
    def create_vizualization(csv_file, hist_col_name):
        df = pd.read_csv(csv_file)
        df[hist_col_name].hvplot.hist()
    return {
        'actions': [(create_vizualization, ["data/data.csv", 'response_time'])],
    }

In [None]:
%doit

## Using Notebooks To Make Documented Functions

Create a `data_access.ipynb` for the download task such that the below code works

Hints: 
1. Add function in a cell and use markdown to add details around it.
2. Use markdown to add more details about the function (example, column description of kind of data expected, what experiment is it for, etc)

In [None]:
from data_access import download_steinmetz_data

def task_download():  
    return {
        'actions': [(download_steinmetz_data, ["https://uni-bonn.sciebo.de/s/FYJPmdTyPo1qwRX/download", "data/data.csv"])],
    }

Create a `stats.ipynb` for the stat task such that the below code works

In [None]:
from stats import calculate_steinmetz_stats

def task_stats():
    return {
        'actions': [(calculate_steinmetz_stats, ["data/data.csv", "data/data_stats.csv"])],
    }

Create a `visualization.ipynb` notebook such that the below code works

In [None]:
from visualization import plot_steinmetz

def task_plot():
    return {
        'actions': [(plot_steinmetz, ["data/data.csv", 'response_time'])],
    }