In [None]:
# default_exp adapters

# Data Adapters

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
import dataclasses
import typing

In [None]:
#export
import collections
import functools

import mezzala.parameters

## Basic adapters

In [None]:
#export


class KeyAdapter:
    """
    Get data from subscriptable objects.
    """
    
    def __init__(self, home_goals, away_goals, **kwargs):
        self._lookup = {
            'home_goals': home_goals,
            'away_goals': away_goals,
            **kwargs
        }

    def _get_in(self, row, item):
        if isinstance(item, list):
            return functools.reduce(lambda d, i: d[i], item, row)
        return row[item]
    
    def __getattr__(self, key): 
        def getter(row):
            return self._get_in(row, self._lookup[key])
        return getter

Anything subscriptable can be with this type of adapter. For example,
you might have input data as a list of tuples (e.g. using Python's
in-built `csv` library)

In [None]:
index_adapter = KeyAdapter(0, 1)

assert index_adapter.home_goals([1, 2]) == 1
assert index_adapter.away_goals([1, 2]) == 2

Or, you might be using a list of dicts.

In [None]:
dict_adapter = KeyAdapter('hg', 'ag', home_team='home', away_team='away')

example_dict = {
    'home': 'Team 1',
    'away': 'Team 2',
    'hg': 4,
    'ag': 3,
}

assert dict_adapter.home_goals(example_dict) == 4
assert dict_adapter.away_goals(example_dict) == 3
assert dict_adapter.home_team(example_dict) == 'Team 1'
assert dict_adapter.away_team(example_dict) == 'Team 2'

Nested data can be supplied using a list

In [None]:
nested_dict_adapter = KeyAdapter(
    home_goals=['scoreline', 0], 
    away_goals=['scoreline', 1]
)

example_nested_dict = {
    'scoreline': [1, 1]
}

assert nested_dict_adapter.home_goals(example_nested_dict) == 1
assert nested_dict_adapter.away_goals(example_nested_dict) == 1

`KeyAdapter` could be used alongside `pd.DataFrame.iterrows` as well; however, it is much faster when using `pd.DataFrame.itertuples`.

Likewise, you can't use a `KeyAdapter` with custom objects (e.g. dataclasses).

In this case, you need an `AttributeAdapter`.

In [None]:
#export


class AttributeAdapter:
    """
    Get data from object attributes.
    """
    def __init__(self, home_goals, away_goals, **kwargs):
        self._lookup = {
            'home_goals': home_goals,
            'away_goals': away_goals,
            **kwargs
        }
        
    def _get_in(self, row, item):
        if isinstance(item, list):
            return functools.reduce(getattr, item, row)
        return getattr(row, item)
    
    def __getattr__(self, key): 
        def getter(row):
            return self._get_in(row, self._lookup[key])
        return getter

In [None]:
@dataclasses.dataclass()
class ExampleData:
    hg: int
    ag: int
    home: str
    away: str


attr_adapter = AttributeAdapter('hg', 'ag', home_team='home', away_team='away')


example_attr = ExampleData(
    home='Another home team',
    away='Another away team',
    hg=5,
    ag=1,
)

assert attr_adapter.home_goals(example_attr) == 5
assert attr_adapter.away_goals(example_attr) == 1
assert attr_adapter.home_team(example_attr) == 'Another home team'
assert attr_adapter.away_team(example_attr) == 'Another away team'

As with `KeyAdapter`, nested attributes can also be fetched using lists

In [None]:
@dataclasses.dataclass()
class Scoreline:
    home: int
    away: int


@dataclasses.dataclass()
class ExampleNestedData:
    scoreline: Scoreline
    home: str
    away: str


nested_attr_adapter = AttributeAdapter(
    home_team='home',
    home_goals=['scoreline', 'home'], 
    away_team='away',
    away_goals=['scoreline', 'away'],
)

example_nested_attr = ExampleNestedData(
    home='Another home team',
    away='Another away team',
    scoreline=Scoreline(2, 5),
)

assert nested_attr_adapter.home_goals(example_nested_attr) == 2
assert nested_attr_adapter.away_goals(example_nested_attr) == 5

## Composite adapters

In [None]:
#export


class LumpedAdapter:
    """ Lump term values who appear below `min_observations` times (defaults to 10) into one term (`placeholder`)"""

    def __init__(self, base_adapter, data, placeholder, min_observations=10):
        self.base_adapter = base_adapter
        self.min_matches = min_matches
        self.placeholder = placeholder

        self.match_count = None
        self.train(data)
        
    def home_team(self, row):
        home_team = self.base_adapter.home_team(row)
        if self.match_count[home_team] <= self.min_matches:
            return self.placeholder
        return home_team

    def away_team(self, row):
        away_team = self.base_adapter.away_team(row)
        if self.match_count[away_team] <= self.min_matches:
            return self.placeholder
        return away_team

    def home_goals(self, row):
        return self.base_adapter.home_goals(row)

    def away_goals(self, row):
        return self.base_adapter.away_goals(row)

    def fit(self, data):
        home_match_count = collections.Counter(self.base_adapter.home_team(row) for row in data)
        away_match_count = collections.Counter(self.base_adapter.away_team(row) for row in data)
        self.match_count = home_match_count + away_match_count