# Visualization scenario summary statistics

In [1]:
import csv
import json
import re

In [2]:
csv_data_file_path = './data/geocoded_sampledata_a.csv'
json_output_file_path = './output/scenarios_with_summary_grouped.json'
read_csv_default_options = { "delimiter": "," }

size_column = "Qm2"
price_column = "Price"

In [3]:
scenarios = [
	{"household": { "adults": 1, "children": 2, "income": 1200 }},
	{"household": { "adults": 2, "children": 3, "income": 2000 }}
]

In [4]:
default_group_by = "Municipali"

In [5]:
default_factor_threshold_price_rent_income = 0.3
default_factor_threshold_price_purchase_income = 50
default_factor_threshold_price_option = "rent"

In [6]:
default_options_threshold_size = {
	"min_size": 20,
	"factor_size_adults": 15,
	"factor_size_children": 10
}

In [7]:
def read_csv(file_path, options=read_csv_default_options):

	"""
	Reads and parses csv as dictionary.

	Arguments
	file_path: (relative) file path of the csv file
	options: set of options to pass to the reader / parser.
		delimiter: delimiter character for parsing the csv file.
	"""

	with open(csv_data_file_path) as csv_file:
		rows = csv.reader(csv_file, delimiter=options["delimiter"])

		header_row = []
		offers = []

		for row in rows:
			parsed_row = parse_row(row, header_row)
			offer = parsed_row[0]
			
			if offer != False:
				offers.append(offer)
			else:
				header_row = parsed_row[1]

		return offers

In [8]:
def parse_row_to_dict_with_header(row, header_row):

	"""
	Parses a row with a reference header_row
	Assumes we have linked lists. 

	Arguments
	row: list of values
	header_row: list of keys
	"""

	return {header_row[i]: row[i] for i in range(0, len(header_row), 1)}

In [9]:
def parse_row(row, header_row):

	"""
	Parses the a row as a dictionary based on the keys provided by the header_row.

	Arguments
	row: list to parse (values)
	header_row: reference list (keys)

	Returns
	index=0: the parsed row or False in case the header_row was passed empty
	index=1: the referenced header_row or the original row in case the header_row was passed empty
	"""

	empty_header_row_length = 0

	if len(header_row) == empty_header_row_length:
		return [False, row]
		
	else:
		row = parse_row_to_dict_with_header(row, header_row)
		return [row, header_row] 

In [10]:
def get_scenario_threshold_price_rent(income, income_factor=default_factor_threshold_price_rent_income):

	"""
	Returns the price threshold based on income and income_factor
	"""

	return income * income_factor

def get_scenario_threshold_price_purchase(income, income_factor=default_factor_threshold_price_purchase_income):

	"""
	Returns the price threshold based on income and income_factor
	"""
	
	return income * income_factor

In [11]:
def get_scenario_threshold_price(income):

	"""
	Returns the price thresholds for rent and purchase
	"""
	
	thresholds = {}
	thresholds["rent"] = get_scenario_threshold_price_rent(income)
	thresholds["purchase"] = get_scenario_threshold_price_purchase(income)
	return thresholds

In [12]:
def get_scenario_threshold_size(adults, children, options=default_options_threshold_size):

	"""
	Returns the size threshold based on the number of adults and children
	"""
	
	min_size_adults = adults * options["factor_size_adults"] 
	min_size_children = children * options["factor_size_children"]
	return options["min_size"] + min_size_adults + min_size_children

In [13]:
def get_scenario_thresholds(scenario):

	"""
	Returns the price and size thresholds based on the scenario
	"""

	_scenario = dict(scenario)

	thresholds = {}
	thresholds["size"] = get_scenario_threshold_size(_scenario["household"]["adults"], _scenario["household"]["children"])
	thresholds["price"] = get_scenario_threshold_price(_scenario["household"]["income"])

	_scenario["thresholds"] = thresholds
	return _scenario

In [14]:
def find_index(list, attribute, target):

	"""
	Returns the index of the found dictionary based on the attribute name and target value.

	Returns -1 if no index is found
	"""

	for index, it in enumerate(list):
		if it[attribute] == target:
			return index
	return -1

def cast_column_float(string):

	"""
	Returns cast float value based.

	Assumes the string can contain 
	whitespace characters, 
	a decimal point, 
	but no other non-digit characters.
	"""

	clean_string = re.sub(r'\s', '', string)
	return float(clean_string)

In [15]:
def get_fits_size(offer, scenario):

	"""
	Returns boolean value indicating if offer is suitable in size.
	"""

	return cast_column_float(offer[size_column]) >= scenario["thresholds"]["size"]

def get_fits_price(offer, scenario, price_option=default_factor_threshold_price_option):
	
	"""
	Returns boolean value indicating if offer is affordable in price.
	"""
	
	return cast_column_float(offer[price_column]) <= scenario["thresholds"]["price"][price_option]


In [16]:
increase_total_by = 1

def get_result_with_increments(result, fits_size, fits_price):

	"""
	Returns a offer_result dictionary 
	complemented with calculated increments
	based on the fits_size and fits_price boolean values. 
	"""

	_result = dict(result)
	_result["count_total"] = increase_total_by
	_result["count_fits_size"] = 1 if fits_size else 0
	_result["count_fits_price"] = 1 if fits_size and fits_price else 0

	return _result

In [17]:
def get_result_with_name(result, offer, group_by):

	"""
	Returns the offer_result dictionary
	complemented with the group name
	inferred from the offer and the group_by column name.
	"""

	_result = dict(result)
	_result["name"] = offer[group_by]
	return _result

In [18]:
def get_offer_results(scenario, offer, group_by):

	"""
	Returns a offer_result dictionary 
	with the name of the group
	and the increments for the summary statistics.

	Checks if the offer fits the size and price thresholds of the scenario
	and adds the increments to the result dictionary.
	"""
	
	fits_size = get_fits_size(offer, scenario)
	fits_price = get_fits_price(offer, scenario)

	result = {}
	result = get_result_with_increments(result, fits_size, fits_price) 
	result = get_result_with_name(result, offer, group_by)

	return result

In [19]:
def update_groups(groups, offer_results):

	"""
	Returns a new _groups list updated by a single offer_result.
	"""

	_groups = list(groups)

	found_index = find_index(groups, "name", offer_results["name"])

	if found_index == -1:
		_groups.append(offer_results)
	else:
		_groups[found_index]["count_total"] += offer_results["count_total"]
		_groups[found_index]["count_fits_size"] += offer_results["count_fits_size"]
		_groups[found_index]["count_fits_price"] += offer_results["count_fits_price"]

	return _groups


In [20]:
def get_summary_grouped(scenario, offers, group_by):
	
	"""
	Returns a list of summary statistics 
	for a given scenario 
	based on the group_by column name
	and the list of offers.
	"""

	groups = []

	for offer in offers:
		offer_results = get_offer_results(scenario, offer, group_by)
		groups = update_groups(groups, offer_results)
	
	return groups

In [21]:
def get_scenarios_with_summary_grouped(scenarios, offers, group_by=default_group_by):

	"""
	Returns a list of scenarios 
	enhanced by the scenario thresholds 
	and the grouped summary statistics.
	"""

	_scenarios = []

	for scenario in scenarios:
		_scenario = get_scenario_thresholds(scenario)
		_scenario["summary_grouped"] = get_summary_grouped(_scenario, offers, group_by)
		_scenarios.append(_scenario)

	return _scenarios

In [22]:
offers = read_csv(csv_data_file_path)
print(offers[0])

{'Country': 'Serbia', 'City': 'Belgrade', 'Municipali': 'Opstina Vracar', 'District': 'Pravni fakultet', 'Street': 'Svetozara Markovica', 'Price': '125', 'Qm2': '42', 'struktura': '15 Broj soba', 'result_num': '0', 'osm_id': '5210158', 'display_na': 'Светозара Марковића, Београд (Врачар), Градска општина Врачар, Београд, Град Београд, Централна Србија, 11000, Србија', 'category': 'highway', 'type': 'residential', 'latlong': '44.8083054,20.4695537', 'label': '', 'label_1': '', 'label_2': ''}


In [23]:
scenarios_with_summary_grouped = get_scenarios_with_summary_grouped(scenarios, offers)
print(scenarios_with_summary_grouped)

[{'household': {'adults': 1, 'children': 2, 'income': 1200}, 'thresholds': {'size': 55, 'price': {'rent': 360.0, 'purchase': 60000}}, 'summary_grouped': [{'count_total': 448, 'count_fits_size': 278, 'count_fits_price': 11, 'name': 'Opstina Vracar'}]}, {'household': {'adults': 2, 'children': 3, 'income': 2000}, 'thresholds': {'size': 80, 'price': {'rent': 600.0, 'purchase': 100000}}, 'summary_grouped': [{'count_total': 448, 'count_fits_size': 157, 'count_fits_price': 27, 'name': 'Opstina Vracar'}]}]


In [24]:
with open(json_output_file_path, 'w') as fp:
	json.dump(scenarios_with_summary_grouped, fp, indent=2)

In [17]:
def threshold_price_purchase(income_monthly):
	interest_rate = 0.0255
	factor_q = 1 + interest_rate
	duration = 30
	factor_disposable_income = 0.3
	months_in_year = 12
	factor_down_payment_20_percent = 1.25

	income_disposable_monthly = income_monthly * factor_disposable_income
	income_disposable_annual = income_disposable_monthly * months_in_year

	affordable_loan = income_disposable_annual * factor_down_payment_20_percent * ( ( pow( factor_q, duration ) - 1) / ( pow( factor_q, duration ) * interest_rate) )
	down_payment = affordable_loan * 0.2
	
	return [down_payment, affordable_loan] 

threshold_price_purchase(3200)

[59879.32908305337, 299396.64541526686]