In [None]:
import ipytest
import numpy as np
import pytest
import pandas as pd

In [None]:
PYTEST_OPTIONS = ['-v', '--color=yes']

In [None]:
@pytest.fixture
def expected_gender():
    return pd.Series([
        1,
        0
    ],
    name='gender')


@pytest.fixture
def expected_years_lived_dead():
    return pd.Series(87,
    name='years_lived')


@pytest.fixture
def expected_years_lived_alive():
    return pd.Series(55,
    name='years_lived')


@pytest.fixture
def expected_theoretical_physicist_categories():
    return pd.Series([
        1,
        0,
    ],
    name='theoretical_physicist')


@pytest.fixture
def expected_theoretical_physicist_field():
    return pd.Series([
        1,
        0,
        0
    ],
    name='theoretical_physicist')


@pytest.fixture
def expected_experimental_physicist_description():
    return pd.Series([
        1,
        0,
        0
    ],
    name='theoretical_physicist')


@pytest.fixture
def expected_astronomer_comment():
    return pd.Series([
        1,
        0,
        0
    ],
    name='theoretical_physicist')


@pytest.fixture
def expected_num_laureates():
    return pd.Series([
        2,
        0,
        0
    ],
    name='num_physics_laureate_academic_advisors')


@pytest.fixture
def expected_num_laureates_name():
    return pd.Series([
        1,
        0
    ],
    name='num_chemistry_laureate_spouses')


@pytest.fixture
def expected_places_codes():
    return pd.Series([
        ['DNK'],
        [],
        ['GBR', 'IND', 'PAK'],
        ['AZE', 'GEO', 'RUS'],
        ['DEU', 'UKR', 'USA'],
        []
    ],
    name='workplaces_alpha_3_codes')


@pytest.fixture
def expected_citizenship_codes_citizenship():
    return pd.DataFrame([
        [['USA'], 1, ['NA'], 1],
        [['USA'], 1, ['NA'], 1],
        [['DEU', 'GBR', 'USA'], 3, ['EU', 'NA'], 2]],
        columns=['citizenship_country_alpha_3_codes',
                 'num_citizenship_country_alpha_3_codes',
                 'citizenship_continent_codes',
                 'num_citizenship_continent_codes'])


@pytest.fixture
def expected_citizenship_codes_nationality():
    return pd.DataFrame([
        [['IND'], 1, ['AS'], 1],
        [['RUS'], 1, ['EU'], 1],
        [['IRN', 'USA'], 2, ['AS', 'NA'], 2]],
        columns=['citizenship_country_alpha_3_codes',
                 'num_citizenship_country_alpha_3_codes',
                 'citizenship_continent_codes',
                 'num_citizenship_continent_codes'])


@pytest.fixture
def expected_citizenship_codes_description():
    return pd.DataFrame([
        [['GBR'], 1, ['EU'], 1],
        [['GBR'], 1, ['EU'], 1],
        [['DEU', 'USA'], 2, ['EU', 'NA'], 2]],
        columns=['citizenship_country_alpha_3_codes',
                 'num_citizenship_country_alpha_3_codes',
                 'citizenship_continent_codes',
                 'num_citizenship_continent_codes'])


@pytest.fixture
def expected_citizenship_codes_multiple():
    return pd.DataFrame([
        [['AUS', 'GBR'], 2, ['EU', 'OC'], 2],
        [['FRA', 'GBR'], 2, ['EU'], 1],
        [['RUS', 'UKR'], 2, ['EU'], 1],
        [['COL', 'ESP', 'USA'], 3, ['EU', 'NA', 'SA'], 3]],
        columns=['citizenship_country_alpha_3_codes',
                 'num_citizenship_country_alpha_3_codes',
                 'citizenship_continent_codes',
                 'num_citizenship_continent_codes'])


@pytest.fixture
def expected_citizenship_codes_not_found():
    return pd.DataFrame([
        [[], 0, [], 0]],
        columns=['citizenship_country_alpha_3_codes',
                 'num_citizenship_country_alpha_3_codes',
                 'citizenship_continent_codes',
                 'num_citizenship_continent_codes'])

In [None]:
ipytest.clean_tests("test_build_features*")
ipytest.clean_tests("test_build_num_laureates*")
ipytest.clean_tests("test_build_places_codes*")



def test_build_features_gender(expected_gender):
    gender = pd.Series(['male', 'female'])
    assert(_build_gender(gender).equals(expected_gender))
    

def test_build_features_years_lived_dead(expected_years_lived_dead):
    birth_date = pd.Series('1908-11-12')
    death_date = pd.Series('1996-1-19')
    assert(_build_years_lived(birth_date, death_date).equals(
        expected_years_lived_dead))
    
    
def test_build_features_years_lived_alive(expected_years_lived_alive):
    birth_date = pd.Series('1963-05-07')
    death_date = pd.Series(np.nan)
    assert(_build_years_lived(birth_date, death_date).equals(
        expected_years_lived_alive))
    

def test_build_features_physics_subfield_categories(
    expected_theoretical_physicist_categories):
    categories = pd.Series(['Nuclear Physicist|Theoretical physicists',
                            '1932 deaths|American physicists'])
    assert(_build_physics_subfield(
        categories,
        pd.Series(dtype=str),
        pd.Series(dtype=str),
        pd.Series(dtype=str),
        {'categories': 'Theoretical physicists',
         'others': 'theoretical physic'}).equals(
        expected_theoretical_physicist_categories))
    
    
def test_build_features_physics_subfield_field(
    expected_theoretical_physicist_field):
    field = pd.Series(['physics|Theoretical physics',
                       'Philosophy|Mathematics|Quantum field theory',
                       np.nan])
    assert(_build_physics_subfield(
        pd.Series(dtype=str, index=field.index),
        field,
        pd.Series(np.full(len(field), np.nan)),
        pd.Series(np.full(len(field), np.nan)),
        {'categories': 'Theoretical physicists',
         'others': 'theoretical physic'}).equals(
        expected_theoretical_physicist_field))
                     

def test_build_features_physics_subfield_description(
    expected_experimental_physicist_description):
    description = pd.Series(['Marie Curie was chair of experimental physics',
                             'Lise Meitner was a nuclear physicist',
                             np.nan])
    assert(_build_physics_subfield(
        pd.Series(dtype=str, index=description.index),
        pd.Series(np.full(len(description), np.nan)),
        description,
        pd.Series(np.full(len(description), np.nan)),
        {'categories': 'Experimental physicists',
         'others': 'experimental physic'}).equals(
        expected_experimental_physicist_description))
    

def test_build_features_physics_subfield_comment(
    expected_astronomer_comment):
    comment = pd.Series(['Antony Hewish was an astronomer',
                         'Jocelyn Bell Burnell is an Astrophysicist',
                         np.nan])
    assert(_build_physics_subfield(
        pd.Series(dtype=str, index=comment.index),
        pd.Series(np.full(len(comment), np.nan)),
        comment,
        pd.Series(np.full(len(comment), np.nan)),
        {'categories': 'astronomers',
         'others': 'astronom'}).equals(
        expected_astronomer_comment))
    
    
def test_build_num_laureates(expected_num_laureates):
    academic_advisors = pd.Series(['J. J. Thomson|William Henry Bragg',
                                   'Joe Bloggs',
                                   np.nan])
    assert(_build_num_laureates(
        academic_advisors,
        nobel_physicists.Laureate,
        nobel_physicists.name).equals(
        expected_num_laureates))
    

def test_build_num_laureates_name(expected_num_laureates_name):
    spouses = pd.Series(['Marie Skłodowska-Curie',
                         'Pierre Curie'])
    assert(_build_num_laureates(
        spouses,
        nobel_chemists.Laureate,
        nobel_chemists.name).equals(
        expected_num_laureates_name)) 

    
def test_build_places_codes(expected_places_codes):
    birth_places = pd.Series([
        'Copenhagen',
        np.nan,
        'Jhang|Presidencies and provinces of British India|Punjab Province (British India)',
        'Elisabethpol Governorate|Ganja, Azerbaijan|Russian Empire|Tbilisi',
        'California Institute of Technology|Institute for Advanced Study|'
        'Kharkiv Polytechnic Institute|Leipzig University|'
        'Los Angeles Bureau of Power and Light|University of Cincinnati|Xavier University',
        'No possible chance of being found in places dataframe'
    ])
    assert(_build_places_codes(
        birth_places,
        places.fullName,
        places.countryAlpha3Code).equals(expected_places_codes))


def test_build_citizenship_features_citizenship(
    expected_citizenship_codes_citizenship):
    features = pd.DataFrame()
    
    physicists = pd.DataFrame([
        ['American', np.nan, np.nan],
        ['United States', np.nan, np.nan],
        ['British nationality law|Citizenship of United States|Nazi Germany',
         np.nan,
         np.nan]
    ], columns=['citizenship', 'nationality', 'description'])
        
    _build_citizenship_features(features, physicists, nationalities)
    assert(features.equals(expected_citizenship_codes_citizenship))
    

def test_build_citizenship_features_nationality(
    expected_citizenship_codes_nationality):
    features = pd.DataFrame()
    
    physicists = pd.DataFrame([
        [np.nan, 'Indian people', np.nan],
        [np.nan, 'Soviet Union', np.nan],
        [np.nan,
         'Iranian Americans|No chance in hell',
         np.nan]
    ], columns=['citizenship', 'nationality', 'description'])
        
    _build_citizenship_features(features, physicists, nationalities)
    assert(features.equals(expected_citizenship_codes_nationality))
    
    
def test_build_citizenship_features_description(
    expected_citizenship_codes_description):
    features = pd.DataFrame()
    
    physicists = pd.DataFrame([
        [np.nan, np.nan, 'Scottish physicist'],
        [np.nan, np.nan, 'British physicist and discovered the atom.'],
        [np.nan,
         np.nan,
         'German-American physicist and founder of relativity theory.']
    ], columns=['citizenship', 'nationality', 'description'])
        
    _build_citizenship_features(features, physicists, nationalities)
    assert(features.equals(expected_citizenship_codes_description))
    
    
def test_build_citizenship_features_multiple(
    expected_citizenship_codes_multiple):
    features = pd.DataFrame()
    
    physicists = pd.DataFrame([
        ['Australia', 'Australian-British theoretical physicist', np.nan],
        ['British', np.nan, 'French born mathematician'],
        [np.nan, 'Soviet Union', 'Soviet-Ukrainian optical physicist'],
        ['Colombian', 'Colombian-Spanish', 'Colombian born physicist living in '
         'the United States of America'],
        
    ], columns=['citizenship', 'nationality', 'description'])
        
    _build_citizenship_features(features, physicists, nationalities)
    assert(features.equals(expected_citizenship_codes_multiple))
    
    
def test_build_citizenship_features_not_found(
    expected_citizenship_codes_not_found):
    features = pd.DataFrame()
    
    physicists = pd.DataFrame([
        [np.nan, 'Wakanda', 'physicist'],
    ], columns=['citizenship', 'nationality', 'description'])
        
    _build_citizenship_features(features, physicists, nationalities)
    assert(features.equals(expected_citizenship_codes_not_found))


ipytest.run_pytest(
    filename='../../tests/test_3.0-build-features.ipynb',
    pytest_options=PYTEST_OPTIONS)