# Introduction to TaskChain via Example project

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from movie_ratings import config
from taskchain import Config

from pathlib import Path

In [None]:
!tree {config.DATA_DIR}/source_data

In [None]:
task_data_path = Path('/tmp/task_data')

## Explore IMDB movies

In [None]:
!tree {config.CONFIGS_DIR}

In [None]:
config_path = config.CONFIGS_DIR / 'movies' / 'imdb.filtered.yaml'

In [None]:
with config_path.open() as f:
    for line in f:
        print(line[:-1])

In [None]:
conf = Config(
    task_data_path, # where shoud by data stored
    config_path,
    global_vars=config,  # set global variables
)
chain = conf.chain()
chain.set_log_level('DEBUG')
chain

In [None]:
chain.draw()

In [None]:
# access tasks
chain.duration_histogram

In [None]:
# access data
chain.duration_histogram.value

In [None]:
# we have some data computed !
chain

In [None]:
_ = chain.genres.value
chain.genres.run_info

## Explore models

In [None]:
from taskchain import MultiChain

chains = MultiChain.from_dir(
    task_data_path, 
    config.CONFIGS_DIR / 'rating_model' / 'all_features', 
    global_vars=config,
)
chains

In [None]:
chains['baseline']

In [None]:
chains['baseline'].draw()

In [None]:
chains['baseline'].test_metrics.value

In [None]:
for name, chain in chains.items():
    print(f'{name:>20}: {chain.test_metrics.value["RMSE"]:.3f} {chain.test_metrics.value["MAE"]:.3f}')


## Exercise 

- add own config
    - for movies movies with different filtering - task `all_movies` shoud not be recomputed
    - for models trained only on personal data
        - set `user_rating_file` parameter 
- add own tasks
    - e.g. for analyse ratings per ganres
        - avg
        - histograms
        - the best genre combination
- add own pipeline
    - analyze personal ratings
    - movies as prerequisite to get more data about movies
    - use your imagination on what analasys is interesting
