In [2]:
import os
import sys

module_path = os.path.abspath(os.path.join('../src'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [3]:
from framework.parser.pandas_parser import parse_dataframe_as_items
from framework.property import CategoricalProperty, NumericalProperty

import pandas as pd
frame = pd.read_csv("../../Datasets/Temp/train.csv")

mapping = {
    "MSSubClass": CategoricalProperty,
    "MSZoning": CategoricalProperty,
    "LotFrontage": NumericalProperty,
    "LotArea": NumericalProperty,
    "Street": CategoricalProperty,
    "Alley": CategoricalProperty,
    "LotShape": CategoricalProperty,
    "LandContour": CategoricalProperty,
    "Utilities": CategoricalProperty,
    "LotConfig": CategoricalProperty,
    "LandSlope": CategoricalProperty,
    "Neighborhood": CategoricalProperty,
    "Condition1": CategoricalProperty,
    "Condition2": CategoricalProperty,
    "BldgType": CategoricalProperty,
    "HouseStyle": CategoricalProperty
}

items_og = parse_dataframe_as_items(frame, mapping, id_col="Id")

In [4]:
import random as rng
from random import choice
from framework.critique.unit_critique_generator import generate_valid_critiques

items = items_og.copy()

rng.seed()

# Select random item as starting point and goal
anchor = choice(items)
goal = choice(items)

# Make sure that they are different
if anchor == goal:
    raise Exception("Goal and anchor the same")

print("Starting item: {}".format(anchor))
steps = 0
while anchor != goal and steps < 10000:
    # Get "recommended" critiques for current anchor
    possible_critiques = generate_valid_critiques(anchor)
    # Temp shuffle since no logic to order critiques   
    rng.shuffle(possible_critiques)
    
    # Atm pick top critique that goal item passes
    i = 0
    prop_name = possible_critiques[i].prop_name
    current_crit = possible_critiques[i].critique
    while not current_crit.passes_critique(goal[prop_name]):
        i+=1
        prop_name = possible_critiques[i].prop_name
        current_crit = possible_critiques[i].critique
    print("Chosen critique: {}".format(current_crit))
    # Remove all items that do not match the critique
    items = list(item for item in items if current_crit.passes_critique(item[prop_name]))
    # ATM pick random other from remaining set but should use a recommender here
    anchor = choice(items)
    print("New anchor: {}".format(anchor))
    print("Items left: {}".format(len(items)))
    steps+=1
print(steps)

Starting item: <id:837, properties:{'MSSubClass': '30', 'MSZoning': 'RM', 'LotFrontage': '90.0', 'LotArea': '8100.0', 'Street': 'Pave', 'Alley': 'Pave', 'LotShape': 'Reg', 'LandContour': 'Lvl', 'Utilities': 'AllPub', 'LotConfig': 'Inside', 'LandSlope': 'Gtl', 'Neighborhood': 'OldTown', 'Condition1': 'Norm', 'Condition2': 'Norm', 'BldgType': '1Fam', 'HouseStyle': '1Story'}>
Chosen critique: <NotCritique> (for Pave)
New anchor: <id:722, properties:{'MSSubClass': '120', 'MSZoning': 'RM', 'LotFrontage': 'nan', 'LotArea': '4426.0', 'Street': 'Pave', 'Alley': 'nan', 'LotShape': 'Reg', 'LandContour': 'Lvl', 'Utilities': 'AllPub', 'LotConfig': 'Inside', 'LandSlope': 'Gtl', 'Neighborhood': 'CollgCr', 'Condition1': 'Norm', 'Condition2': 'Norm', 'BldgType': 'TwnhsE', 'HouseStyle': '1Story'}>
Items left: 1419
Chosen critique: <NotCritique> (for CollgCr)
New anchor: <id:747, properties:{'MSSubClass': '60', 'MSZoning': 'RL', 'LotFrontage': 'nan', 'LotArea': '8795.0', 'Street': 'Pave', 'Alley': 'nan'

In [15]:
from framework.critique.critique_recommender import recommend_critiques, calc_anchor_crits_support_structure

possible_critiques = generate_critiques(anchor)
support = recommend_critiques(possible_critiques, items_og)

# Sort the critiques
as_tuples = list((prop_name,critique,support) for prop_name, critiques in support.items() for critique, support in critiques.items())
as_tuples = sorted(as_tuples, key=lambda x: x[2])

In [17]:
%matplotlib
import seaborn as sns
# Plot distribution of critiques support
support_of_critis = list(support for _,_,support in as_tuples)
sns.set(style="darkgrid")
sns.lineplot(x=list(range(len(support_of_critis))),y=support_of_critis)

Using matplotlib backend: TkAgg


<matplotlib.axes._subplots.AxesSubplot at 0x7f1f70369fd0>