# Scheduling

In [None]:
import dask
from datetime import datetime
import os
import time
from pandas import DataFrame
from boto3 import resource

from grizly.scheduling.registry import Job
from grizly import Email, S3
import logging

## Register jobs

Before you register a job you have to define tasks that your job will run. Let's define a function that returns last modified date of a file in S3.

In [None]:
@dask.delayed
def get_last_modified_date(s3_key, file_name):
    date = S3(s3_key=s3_key, file_name=file_name).last_modified
    return str(date)

In [None]:
task = get_last_modified_date(s3_key="grizly/", file_name="test_scheduling.csv")

Jobs that are listening for some changes are called **listener jobs**. A good practise is to start their name with `listener` prefix so that they are easy to list.

In [None]:
job = Job("listener_s3_grizly_test_scheduling")

job.register(tasks=[task], 
             if_exists="replace"
            )

We just registered a job called `listener_s3_grizly_test_scheduling`. The name of the job is unique and you can always check its details with `info()` method.

In [None]:
job = Job("listener_s3_grizly_test_scheduling")
job.info()

As you can see this job is not scheduled yet - it's not a cron job and it doesn't have any upstream jobs and it doesn't have any triggers. You can pass these parameters during registration or overwrite them later using `crons`, `upstream` or `triggers` attributes.

## Add cron string

Let's add now a cron string to our job to run every one minute.

In [None]:
job.crons = "* * * * *"

job.info()

## Register jobs with upstream job

Let's now register two jobs with upstream job `listener_s3_grizly_test_scheduling`. One will send an email whenever upstream finished with status `success` and the other will send an email whenever the upstream changed his result.

In [None]:
@dask.delayed
def send_email(subject, body, to):
    logger = logging.getLogger("distributed.worker").getChild("email")
    e = Email(subject=subject, body=body, logger=logger)
    e.send(to=to)

In [None]:
task = send_email(subject="Job success",
                   body="Job `listener_s3_grizly_test_scheduling` finished with status success.", 
                   to="katarzyna.malina@te.com")

job = Job("email_upstream_succcess")

job.register(tasks=[task], 
             if_exists="replace",
             upstream={"listener_s3_grizly_test_scheduling": "success"}
             )

job.info()

In [None]:
task = send_email(subject="File changed",
                   body="Somebody changed 'grizly/test_scheduling.csv' file!", 
                   to="katarzyna.malina@te.com")

job = Job("email_upstream_result_change")

job.register(tasks=[task], 
               if_exists="replace",
               upstream={"listener_s3_grizly_test_scheduling": "result_change"}
              )

job.info()

You can see now that `listener_s3_grizly_test_scheduling` has two downstream jobs.

In [None]:
job = Job("listener_s3_grizly_test_scheduling")
job.info()

After the first run (in this case after one minute) you will be able to access `last_run` property with information about the last run of your job.

## Unregister jobs

In [None]:
Job("listener_s3_grizly_test_scheduling").unregister(remove_job_runs=True)
Job("email_upstream_succcess").unregister(remove_job_runs=True)
Job("email_upstream_result_change").unregister(remove_job_runs=True)

In [None]:
Job("listener_s3_grizly_test_scheduling").downstream