# Test summarize function

This notebook tests that the `summarize` function (to classify a repo as Python 2, 3, either, neither, a mix, or empty) follows intuition.

This notebook is not part of the main data analysis workflow.

In [1]:
import pandas as pd
import numpy as np
import sys
from os.path import join
import itertools

In [2]:
def summarize(either, neither, python2, python3):
    if neither > 0:
        return 'neither'
    if python3 == 0 and python2 > 0:
        return 'only2'
    if python2 == 0 and python3 > 0:
        return 'only3'
    if python3 == 0 and python2 == 0 and either > 0:
        return 'either'
    if python2 > 0 and python3 > 0:
        return 'mix'
    else:
        return 'empty'

summarize_vec = np.vectorize(summarize)

def select(either, neither, python2, python3):
    if either + python3 > 0 and python2 <= python3:
        return True
    else:
        return False
    
select_vec = np.vectorize(select)

In [3]:
combs = sorted(itertools.product([0,1], repeat=4))

In [4]:
combs

[(0, 0, 0, 0),
 (0, 0, 0, 1),
 (0, 0, 1, 0),
 (0, 0, 1, 1),
 (0, 1, 0, 0),
 (0, 1, 0, 1),
 (0, 1, 1, 0),
 (0, 1, 1, 1),
 (1, 0, 0, 0),
 (1, 0, 0, 1),
 (1, 0, 1, 0),
 (1, 0, 1, 1),
 (1, 1, 0, 0),
 (1, 1, 0, 1),
 (1, 1, 1, 0),
 (1, 1, 1, 1)]

In [5]:
example_df = pd.DataFrame(combs,
                          columns=['either', 'neither', 'python2', 'python3'])
example_df

Unnamed: 0,either,neither,python2,python3
0,0,0,0,0
1,0,0,0,1
2,0,0,1,0
3,0,0,1,1
4,0,1,0,0
5,0,1,0,1
6,0,1,1,0
7,0,1,1,1
8,1,0,0,0
9,1,0,0,1


In [6]:
example_df['summary'] = example_df.apply(
    lambda x: summarize_vec(x.either, x.neither, x.python2, x.python3),
    axis=1)
example_df['select'] = example_df.apply(
    lambda x: select_vec(x.either, x.neither, x.python2, x.python3),
    axis=1)

In [7]:
example_df

Unnamed: 0,either,neither,python2,python3,summary,select
0,0,0,0,0,empty,False
1,0,0,0,1,only3,True
2,0,0,1,0,only2,False
3,0,0,1,1,mix,True
4,0,1,0,0,neither,False
5,0,1,0,1,neither,True
6,0,1,1,0,neither,False
7,0,1,1,1,neither,True
8,1,0,0,0,either,True
9,1,0,0,1,only3,True
