In [1]:
from cse6040_devkit.sampler_testing import CaseManager
from cse6040_devkit import sampler_testing
import dill
import pandas as pd
import numpy as np
import re
from pprint import pprint
from cse6040_devkit.assignment import execute_tests
from cse6040_devkit import utils
from solutions import char_df, spec_df

cm = CaseManager()

Inverted Dictionary

- Sort check: not a concern due to un-ordered return type
- Edge case and distribution check check: Will load test cases into DataFrame and augment with edge case checks
    

In [2]:
case_df = cm.load_cases_into_df('inverted_dictionary')

exclude_unhash_prefilter = lambda d: {k:v for k,v in d.items() if not isinstance(v, (list, dict, set))}
case_df['has_dup_hashable'] = case_df['d'].apply(sampler_testing.dict_has_duplicate_values, 
                                                 prefilter=exclude_unhash_prefilter)

exclude_hash_prefilter = lambda d: {k:v for k,v in d.items() if isinstance(v, (list, dict, set))}
case_df['has_dup_unhashable'] = case_df['d'].apply(sampler_testing.dict_has_duplicate_values, 
                                                   prefilter=exclude_hash_prefilter)

case_df['has_unhash'] = case_df['d'].apply(sampler_testing.dict_has_unhashable_values)

display(case_df.sample(5))

relavent_cols = ['error_raised', 'allow_dup_vals', 'allow_unhashable_vals', 
                 'has_dup_hashable', 'has_dup_unhashable', 'has_unhash']

case_summary_df = case_df.groupby(relavent_cols)['d'].count().reset_index().rename(columns={'d': 'count'})

display(case_summary_df)

expected_zero_cases = [
    case_summary_df['error_raised'] & case_summary_df['allow_dup_vals'] & case_summary_df['allow_unhashable_vals'],
    case_summary_df['error_raised'] & case_summary_df['allow_dup_vals'] & (case_summary_df['has_unhash'] == False),
    case_summary_df['error_raised'] & case_summary_df['allow_unhashable_vals'] & (case_summary_df['has_dup_hashable'] == False),
    (case_summary_df['error_raised'] == False) & (case_summary_df['allow_dup_vals'] == False) & case_summary_df['has_dup_hashable'],
    (case_summary_df['error_raised'] == False) & (case_summary_df['allow_unhashable_vals'] == False) & case_summary_df['has_unhash']
]

for zero_case in expected_zero_cases:
    assert sum(zero_case) == 0

initializing <class 'cse6040_devkit.test_case.test_case_gen.TestCaseGenerator'>


Unnamed: 0,allow_unhashable_vals,allow_dup_vals,inverted_dictionary_output,d,error_raised,has_dup_hashable,has_dup_unhashable,has_unhash
64,True,True,"{7: 1, 22: 47, 25: 38, 37: 8, 47: 23, 19: 29, ...","{1: 7, 47: 22, 38: 25, 8: 37, 23: 47, 29: 19, ...",False,False,False,False
45,False,True,,"{11: {5}, 37: 23, 32: 19, 36: {2}, 49: 3, 16: ...",True,True,True,True
130,False,True,,"{35: [27], 43: {'value': 13}, 28: 9, 17: {13},...",True,True,True,True
326,False,False,"{22: 24, 4: 4, 37: 47, 45: 0, 15: 5, 18: 14, 1...","{24: 22, 4: 4, 47: 37, 0: 45, 5: 15, 14: 18, 2...",False,False,False,False
593,False,False,,"{44: 43, 11: {'value': True}, 34: 36, 17: 24, ...",True,True,False,True


Unnamed: 0,error_raised,allow_dup_vals,allow_unhashable_vals,has_dup_hashable,has_dup_unhashable,has_unhash,count
0,False,False,False,False,False,False,70
1,False,False,True,False,False,False,77
2,False,False,True,False,False,True,51
3,False,False,True,False,True,True,21
4,False,True,False,False,False,False,85
5,False,True,False,True,False,False,73
6,False,True,True,False,False,False,62
7,False,True,True,False,False,True,44
8,False,True,True,False,True,True,13
9,False,True,True,True,False,False,83


concatenate_quotes

The edge cases in SQL queries come often from nulls in source tables or as the result of joins. 

There are two sort requirements. The quote ordering for concatenation is guaranteed to be sufficient (if there's no nulls). 

In [3]:
visible, hidden = cm.load_cases('concatenate_quotes')

# Verifies no nulls in a df
def null_quote_check(df):
    assert not df.isna().any(axis=None), f"{df[df.isna().any(axis=1)]=}"


for case in [*visible, *hidden]:
    # sortability check
    sampler_testing.assert_frame_sortable(case['result'], by=['quote_count', 'name'])
    # null check in source table
    null_quote_check(case['conn']['quotes'])

initializing <class 'cse6040_devkit.test_case.test_case_gen.TestCaseGenerator'>


sets are unordered
check for characters in result
check for pattern matching characters
check for spaces

In [4]:
cases_df = cm.load_cases_into_df('extract_article_words')

from functools import reduce
from collections import Counter

all_article_words = reduce(lambda value, item: value.union(item), 
                           cases_df['result'])
for word in all_article_words:
    for c in word:
        assert c.isalpha()

all_articles = '\n'.join(record['Article'] for record in cases_df['record'].values)
print(Counter(c for word in all_article_words for c in word))

print(Counter(re.findall(r'[^\w\s]|[\d_]', all_articles)))

print(Counter(re.findall(r'\s', all_articles)))

def alternate_extract_article_words(record):
    article_text = record['Article']
    return set(
        ''.join(c if c.isalpha() or c.isspace() else ' ' for c in article_text.lower()).split()
    )

visible_test_output, hidden_test_output  = cm.test_alternate_function(alternate_extract_article_words,
                                                                      'extract_article_words')

initializing <class 'cse6040_devkit.test_case.test_case_gen.TestCaseGenerator'>
Counter({'e': 1043, 'a': 795, 'i': 743, 'r': 677, 'n': 670, 's': 652, 't': 603, 'o': 553, 'l': 423, 'd': 407, 'c': 347, 'u': 282, 'g': 275, 'm': 260, 'p': 256, 'h': 238, 'b': 145, 'f': 137, 'y': 118, 'w': 114, 'v': 95, 'k': 93, 'j': 32, 'z': 20, 'q': 15, 'x': 15})
Counter({':': 112, ',': 90, '.': 85, '-': 75, '>': 71, '1': 56, '_': 51, '2': 50, '0': 50, '/': 35, '<': 34, '(': 26, ')': 26, '8': 21, '5': 21, '6': 21, '4': 19, '3': 17, "'": 17, '7': 14, '´': 14, '’': 10, '9': 9, '"': 9, '$': 8, '‘': 2, '–': 2, '“': 1})
Counter({' ': 3729, '\n': 39, '\xa0': 7})
extract_article_words test ran 99 iterations in 0.04 seconds
extract_article_words test ran 99 iterations in 0.03 seconds
visible tests pass
hidden tests pass


In [5]:
visible, hidden = cm.load_cases('count_articles') 
all_cases = [*visible, *hidden]

def any_count_one(c):
    return reduce(lambda val, itm: val or (c[itm] == 1),
                  c,
                  False)


df = pd.DataFrame({
    'any_count_one': cm.map_param(all_cases, 'result',
                                  func=any_count_one),
    'empty_article_words': cm.map_param(all_cases, 'article_words',
                                        func=lambda s: len(s) == 0)})
df

initializing <class 'cse6040_devkit.test_case.test_case_gen.TestCaseGenerator'>


Unnamed: 0,any_count_one,empty_article_words
0,True,False
1,True,False
2,False,False
3,True,False
4,False,False
5,True,False
6,False,False
7,True,False
8,True,False
9,False,True


In [6]:
visible, hidden = cm.load_cases('record_examiner')
cases = [*visible, *hidden]

get_name = lambda func: func.__name__
df = pd.DataFrame(
    {
        'red_func': cm.map_param(cases, 'red_func', 
                                 func=get_name),
        'map_func': cm.map_param(cases, 'map_func', 
                                 func=get_name),
        'initial': cm.map_param(cases, 'initial'),
        'some_iter': cm.map_param(cases, 'some_iter', 
                                               func=lambda x: str(x)),
        'result': cm.map_param(cases, 'result'),
    },
    dtype='object'
)
utils.display_df_text_wrap(df)

initializing <class 'cse6040_devkit.test_case.test_case_gen.TestCaseGenerator'>


Unnamed: 0,red_func,map_func,initial,some_iter,result
0,val * itm,y = -2x + 5,0.0,"[0, 3, -8, 7, -6, -5, 0, 3, 2, 5, -9, -5, -4, 4, 1]",-99.0
1,val + itm,y = -2x + 12,,"[9, 4, -3, -2, 8, -3, 9, 1, -7, -3, 7, -1, -10, 6, -2]",154.0
2,val * itm,y = -2x + 5,,"[2, 9, 5, -6, -5, -4, -4, -9, 5, -5, 9, 9, 3, 3, 8]",-33.0
3,val + itm,y = -2x + 12,,"[-10, -3, -7, 8, 8, -2, 1, -9, -10, 9, 9, 0, -8, 8, -3]",198.0
4,val * itm,y = 3.0x + 5.0,,"[2, 1, -7, -1, 2, 8, -5, -10, -3, -9, -6, 3, 5, -6, 7]",4.0
5,val + itm,y = 3x + 5,,"[5, 0, 6, -5, -9, 6, 4, 9, 6, 6, -2, 6, 3, 5, -2]",189.0
6,val * itm,y = -2.0x + 12.0,,"[7, 8, -1, -9, -7, 1, -9, 2, 1, -9, -7, 2, -8, 0, -5]",-252.0
7,val + itm,y = 3x + 12,,"[-6, -3, -8, 2, 4, -1, 7, -2, -7, -7, -10, -3, -2, 3, -1]",78.0
8,val + itm,y = 3x + 5,4.0,"[-6, -3, 6, -1, -3, -2, -7, 1, 1, -3, -9, 8, -4, 5, -7]",7.0
9,val + itm,y = 3x + 12,-1.0,"[4, 2, -10, -3, -9, -2, -2, -7, 0, -1, 1, 0, 2, 9, -5]",116.0


In [7]:
visible, hidden = cm.load_cases('conn_to_df')
all_cases = [*visible, *hidden]

pd.DataFrame(cm.map_param(all_cases, 'result', 
                          func=lambda df: df.apply(lambda col: str({type(v) for v in col}))))

def alternate_conn_to_df(conn):
    df = pd.read_sql('''
                        SELECT 
                            character_name AS name,
                            GROUP_CONCAT(quote, '|') AS quotes,
                            COUNT(*) AS quote_count
                        FROM (SELECT * FROM quotes order by quote)
                        GROUP BY character_name
                        ORDER BY quote_count, name -- different ordering
                     ''', 
                    conn)
    df = df.astype({'name': 'string',   # different column types
                    'quotes': 'string',
                    'quote_count': 'float'})
    return df

visible_test_output, hidden_test_output = cm.test_alternate_function(alternate_conn_to_df,
                                                                     'conn_to_df')

initializing <class 'cse6040_devkit.test_case.test_case_gen.TestCaseGenerator'>
conn_to_df test ran 99 iterations in 0.51 seconds
conn_to_df test ran 99 iterations in 0.50 seconds
visible tests pass
hidden tests pass


In [8]:
visible, hidden = cm.load_cases('count_values')
all_cases = [*visible, *hidden]

df = pd.DataFrame(
    {
        'is_instance_Series': cm.map_param(all_cases,
                                           's',
                                           pd.Series,
                                           func=isinstance),
        'result': cm.map_param(all_cases, 'result', func=str)
    }
)
utils.display_df_text_wrap(df)

def alternate_count_values(s):
    return s.value_counts().sort_values(ascending=True)

_, _ = cm.test_alternate_function(alternate_count_values,
                                  'count_values')

initializing <class 'cse6040_devkit.test_case.test_case_gen.TestCaseGenerator'>


Unnamed: 0,is_instance_Series,result
0,True,"cat 6 the 6 hat 4 two 4 mooney 3 in 2 fish 2 Name: count, dtype: int64"
1,True,"the 6 two 6 blue 6 in 4 marvin 2 one 2 hat 2 Name: count, dtype: int64"
2,True,"mooney 6 fish 6 two 6 cat 6 grinch 5 marvin 5 in 4 Name: count, dtype: int64"
3,True,"in 6 marvin 4 one 4 blue 4 grinch 4 fish 3 two 2 Name: count, dtype: int64"
4,True,"cat 6 mooney 5 one 4 two 4 in 4 the 2 blue 2 Name: count, dtype: int64"
5,True,"hat 6 mooney 6 blue 6 marvin 5 grinch 5 fish 4 one 3 Name: count, dtype: int64"
6,True,"cat 6 k 6 mooney 5 marvin 4 grinch 4 in 4 blue 3 Name: count, dtype: int64"
7,True,"two 6 hat 5 blue 4 marvin 3 one 2 k 2 cat 2 Name: count, dtype: int64"
8,True,"marvin 5 one 3 fish 3 blue 2 mooney 2 k 2 two 2 Name: count, dtype: int64"
9,True,"grinch 6 k 6 the 5 hat 5 two 4 fish 3 cat 3 Name: count, dtype: int64"


count_values test ran 99 iterations in 0.11 seconds
count_values test ran 99 iterations in 0.11 seconds
visible tests pass
hidden tests pass


In [9]:
visible, hidden = cm.load_cases('mat_vec_div')
all_cases = [*visible, *hidden]

df = pd.DataFrame(
    {
        'result': cm.map_param(all_cases, 'result'),
        'min_abs': cm.map_param(all_cases, 'result',
                                func=lambda a: np.min(np.abs(a)))
    }
)

def alternate_mat_vec_div(A, x):
    x_inv = x / x.dot(x.T)
    return A.dot(x_inv)

_, _ = cm.test_alternate_function(alternate_mat_vec_div,
                                  'mat_vec_div')
df

initializing <class 'cse6040_devkit.test_case.test_case_gen.TestCaseGenerator'>
mat_vec_div test ran 99 iterations in 0.03 seconds
mat_vec_div test ran 99 iterations in 0.03 seconds
visible tests pass
hidden tests pass


Unnamed: 0,result,min_abs
0,"[0.853, 3.593, -3.438, 4.858, -0.07]",0.07
1,"[3.748, -1.289, 1.574, -1.713, 1.69]",1.289
2,"[-8.722, -2.872, -0.846, -2.12, 4.512, 10.767]",0.846
3,"[-3.518, -1.104, -3.466]",1.104
4,"[-1.662, 2.152, 1.542, 3.201, 1.763, 1.388, 0....",0.699
5,"[-2.449, -0.956, 0.636, 4.096, -4.292, -0.985,...",0.636
6,"[1.506, 4.748, -7.124]",1.506
7,"[3.736, 3.46, -1.492]",1.492
8,"[-0.568, 0.152, 1.578, -2.681, -0.673, 0.69, -...",0.152
9,"[0.717, 1.025, 0.662, -3.109]",0.662


In [10]:
visible, hidden = cm.load_cases('df_to_coo')
all_cases = [*visible, *hidden]

cm.map_param(all_cases, 'df',
             func=lambda df: (df['row'].astype('str')  + ', ' + df['col'].astype('str')).is_unique)

initializing <class 'cse6040_devkit.test_case.test_case_gen.TestCaseGenerator'>


[False,
 True,
 False,
 False,
 True,
 True,
 True,
 True,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 True,
 True,
 True,
 True,
 True,
 False,
 True,
 True,
 False,
 False]