# Introduction to TaskChain via Example project

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from movie_ratings import config
from taskchain import Config

from pathlib import Path

###  Let's see our source data

In [None]:
!tree {config.DATA_DIR}/source_data

In [None]:
# path where TaskChain store all data
task_data_path = Path('/tmp/task_data')

## Explore IMDB movies

### 1. start with a config

In [None]:
# all configs in project
!tree {config.CONFIGS_DIR}

In [None]:
config_path = config.CONFIGS_DIR / 'movies' / 'imdb.filtered.yaml'

In [None]:
with config_path.open() as f:
    print(f.read())

#### content of the config

- decleres tasks witch will be part of chain
- and some parametersparameters
    - where take source data
    - movies will be filtered to older then 1945 and with at least 1000 ratings 


### 2. Create chain 

In [None]:
chain = Config(
    task_data_path, # where should by data stored
    config_path,
    global_vars=config,  # set global variables
).chain()
chain.set_log_level('DEBUG')

# show tasks
chain

In [None]:
chain.draw()

### 3. Compute and get data 

In [None]:
# access task
chain.duration_histogram

In [None]:
# access data
chain.duration_histogram.value

In [None]:
# we have some data computed !
chain

In [None]:
# show some task computation details
chain.duration_histogram.run_info

## Explore models

In [None]:
from taskchain import MultiChain

# with multichain, we can make multiple chains at once
chains = MultiChain.from_dir(
    task_data_path, 
    config.CONFIGS_DIR / 'rating_model' / 'all_features', 
    global_vars=config,
)
chains

In [None]:
# get a chain - note that some tasks have already computed data from exploring movies
chains['baseline']

In [None]:
chains['baseline'].draw()

In [None]:
# request test_metrics and trigger computation of all necceserray tasks (feature preparation, model training etc.)
chains['baseline'].test_metrics.value

In [None]:
# now get metrics for all models - not only training of new models is done, features are already computed and loaded
for name, chain in chains.items():
    _ = chain.test_metrics.value

for name, chain in chains.items():
    print(f'{name:>20}: {chain.test_metrics.value["RMSE"]:.3f} {chain.test_metrics.value["MAE"]:.3f}')


## Exercise 

- add own config
    - for movies movies with different filtering - task `all_movies` shoud not be recomputed
    - for models trained only on personal data
        - set `user_rating_file` parameter 
- add own tasks
    - e.g. for analyse ratings per ganres
        - avg
        - histograms
        - the best genre combination
- add own pipeline
    - analyze personal ratings
    - movies as prerequisite to get more data about movies
    - use your imagination on what analasys is interesting
