In [23]:
# !pip install doit

In [24]:
from doit import load_ipython_extension
load_ipython_extension()

# Building Workflows in Jupyter Notebooks

## Functions

Functions are reusable blocks of code that perform a specific task. 
They help organize code, make it more readable, and allow you to avoid repetition by encapsulating logic that can be called multiple times throughout a program.

A function is defined using the def keyword followed by the function name and parentheses `()`. 
The code that performs the task is placed inside the function body, indented under the function definition

```python
def function_name(parameters):  # definition, name, parameters
    # body
    return value  # return (optional)
```

**Example** Make a function called `download_data` the replaces the below code:

```python
url = "https://uni-bonn.sciebo.de/s/FYJPmdTyPo1qwRX/download"
output_csv = "data/data.csv"
df = pd.read_csv(url)
df.to_csv(output_csv, index=False)
```

In [8]:
def download_data():
    url = "https://uni-bonn.sciebo.de/s/FYJPmdTyPo1qwRX/download"
    output_csv = "data/data.csv"
    df = pd.read_csv(url)
    df.to_csv(output_csv, index=False)

In [None]:
# download_data()

Make a function called `calculate_stats` that replaces the below code

```python
csv_file = "data/data.csv"
output_csv = "data/data_stats.csv"
df = pd.read_csv(csv_file)
df_stat = df.describe().reset_index()
df_stat.to_csv(output_csv, index=False)
```

In [9]:
def calculate_stats():
    csv_file = "data/data.csv"
    output_csv = "data/data_stats.csv"
    df = pd.read_csv(csv_file)
    df_stat = df.describe().reset_index()
    df_stat.to_csv(output_csv, index=False)

In [None]:
# calculate_stats()

Make a function called `create_visualization` that replaces the below code

```python
csv_file = "data/data.csv"
hist_col_name = 'response_time'
df = pd.read_csv(csv_file)
df[hist_col_name].hvplot.hist()
```

In [10]:
def create_vizualization():
    csv_file = "data/data.csv"
    hist_col_name = 'response_time'
    df = pd.read_csv(csv_file)
    df[hist_col_name].hvplot.hist()

**Example** Make `url` a parameter in `download_data` function

In [16]:
def download_data(url):
    output_csv = "data/data.csv"
    df = pd.read_csv(url)
    df.to_csv(output_csv, index=False)

In [None]:
# download_data("https://uni-bonn.sciebo.de/s/FYJPmdTyPo1qwRX/download")

Make `csv_file` a parameter in `calculate_stats` function

In [17]:
def calculate_stats(csv_file):
    output_csv = "data/data_stats.csv"
    df = pd.read_csv(csv_file)
    df_stat = df.describe().reset_index()
    df_stat.to_csv(output_csv, index=False)

In [19]:
# calculate_stats("data/data.csv")

Make `csv_file` a parameter in `create_visualization` function

In [18]:
def create_vizualization(csv_file):
    hist_col_name = 'response_time'
    df = pd.read_csv(csv_file)
    df[hist_col_name].hvplot.hist()

In [20]:
# calculate_stats("data/data.csv")

**Example** Make `url` and `output_csv` as parameters in `download_data` function

In [11]:
def download_data(url, output_csv):
    df = pd.read_csv(url)
    df.to_csv(output_csv, index=False)

Make `csv_file` and `output_csv` as parameters in `calculate_stats` function

In [12]:
def calculate_stats(csv_file, output_csv):
    df = pd.read_csv(csv_file)
    df_stat = df.describe().reset_index()
    df_stat.to_csv(output_csv, index=False)

Make `csv_file` and `hist_col_name` as parameters in `create_visualization` function

In [15]:
def create_vizualization(csv_file, hist_col_name):
    df = pd.read_csv(csv_file)
    df[hist_col_name].hvplot.hist()

## Building Workflows Within A Jupyter Notebook

**Example** no argument

In [21]:
def task_download_data():
    def download_data():
        url = "https://uni-bonn.sciebo.de/s/FYJPmdTyPo1qwRX/download"
        output_csv = "data/data.csv"
        df = pd.read_csv(url)
        df.to_csv(output_csv, index=False)
    
    return {
        'actions': [download_data],
    }

In [25]:
%doit list

download_data   


In [26]:
def task_stats():
    def calculate_stats():
        csv_file = "data/data.csv"
        output_csv = "data/data_stats.csv"
        df = pd.read_csv(csv_file)
        df_stat = df.describe().reset_index()
        df_stat.to_csv(output_csv, index=False)    
    return {
        'actions': [calculate_stats],
    }

In [27]:
%doit list

download_data   
stats           


In [28]:
def task_plot():
    def create_vizualization():
        csv_file = "data/data.csv"
        hist_col_name = 'response_time'
        df = pd.read_csv(csv_file)
        df[hist_col_name].hvplot.hist()
    return {
        'actions': [create_vizualization],
    }

In [29]:
%doit list

download_data   
plot            
stats           


**Example** single argument

In [30]:
def task_download_data():
    def download_data(url):
        output_csv = "data/data.csv"
        df = pd.read_csv(url)
        df.to_csv(output_csv, index=False)
    
    return {
        'actions': [(download_data, ['"https://uni-bonn.sciebo.de/s/FYJPmdTyPo1qwRX/download"'])],
    }

In [32]:
# 

In [35]:
def task_stats():
    def calculate_stats(csv_file):
        output_csv = "data/data_stats.csv"
        df = pd.read_csv(csv_file)
        df_stat = df.describe().reset_index()
        df_stat.to_csv(output_csv, index=False)
    return {
        'actions': [(calculate_stats, ["data/data.csv"])],
    }

In [33]:
# 

In [36]:
def task_plot():
    def create_vizualization(csv_file):
        hist_col_name = 'response_time'
        df = pd.read_csv(csv_file)
        df[hist_col_name].hvplot.hist()
    return {
        'actions': [(create_vizualization, ["data/data.csv"])],
    }

In [34]:
# 

**Example** multiple arguments

In [41]:
def task_download_data():
    def download_data(url, output_csv):
        df = pd.read_csv(url)
        df.to_csv(output_csv, index=False)
    
    return {
        'actions': [(download_data, ["https://uni-bonn.sciebo.de/s/FYJPmdTyPo1qwRX/download", "data/data.csv"])],
    }

In [37]:
# 

In [42]:
def task_stats():
    def calculate_stats(csv_file, output_csv):
        df = pd.read_csv(csv_file)
        df_stat = df.describe().reset_index()
        df_stat.to_csv(output_csv, index=False)
    return {
        'actions': [(calculate_stats, ["data/data.csv", "data/data_stats.csv"])],
    }

In [38]:
# 

In [43]:
def task_plot():
    def create_vizualization(csv_file, hist_col_name):
        df = pd.read_csv(csv_file)
        df[hist_col_name].hvplot.hist()
    return {
        'actions': [(create_vizualization, ["data/data.csv", 'response_time'])],
    }

In [40]:
# 

## File Dependency

```python

def task_create_csv():
    def create_file():
        df = pd.DataFrame({
            'A': np.random.randint(1, 100, 10),
            'B': np.random.randint(1, 100, 10),
            'C': np.random.randint(1, 100, 10)
        })
        df.to_csv('data.csv', index=False)
    
    return {
        'actions': [create_file],
    }

def task_stats():
    def calculate_stats():
        df = pd.read_csv('data.csv')
        stats = df.describe()
        stats.to_csv('stats.csv')
        print(stats)
    
    return {
        'actions': [calculate_stats],
        'file_dep': ['data.csv'],
    }

def task_plot():
    def create_plot():
        df = pd.read_csv('data.csv')
        df.plot(kind='bar')
        plt.savefig('plot.png')
        print("Plot saved as plot.png")
    
    return {
        'actions': [create_plot],
        'file_dep': ['data.csv'],
    }
```

## Targets

```python
def task_create_csv():
    def create_file():
        df = pd.DataFrame({
            'A': np.random.randint(1, 100, 10),
            'B': np.random.randint(1, 100, 10),
            'C': np.random.randint(1, 100, 10)
        })
        df.to_csv('data.csv', index=False)
        print("CSV file created")
    
    return {
        'actions': [create_file],
        'targets': ['data.csv']
    }

def task_stats():
    def calculate_stats():
        df = pd.read_csv('data.csv')
        stats = df.describe()
        stats.to_csv('stats.csv')
        print(stats)
    
    return {
        'actions': [calculate_stats],
        'file_dep': ['data.csv'],
        'targets': ['stats.csv']
    }

def task_plot():
    def create_plot():
        df = pd.read_csv('data.csv')
        df.plot(kind='bar')
        plt.savefig('plot.png')
        print("Plot saved as plot.png")
    
    return {
        'actions': [create_plot],
        'file_dep': ['data.csv'],
        'targets': ['plot.png']
    }
```