In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
from metrics.calculations import pops_resetelment



In [2]:
#we need to create some houses dataset
#column names are similar to information model's

houses = pd.DataFrame(
    data = {'city_id': [1 for i in range(10000)], 
            'administrative_unit_id': np.random.randint(low = 0,
                                                        high = 9,
                                                        size=10000), 
            'municipality_id': np.random.randint(low = 10,
                                                 high = 20,
                                                 size= 10000),
            'building_id': [i for i in range(10000)],
            'living_area': np.random.uniform(low=1000, high=60000, size=10000)}
)

In [3]:
houses

Unnamed: 0,city_id,administrative_unit_id,municipality_id,building_id,living_area
0,1,7,13,0,15924.836593
1,1,4,14,1,27617.321590
2,1,1,10,2,56154.637855
3,1,0,11,3,14412.242486
4,1,2,12,4,35465.924357
...,...,...,...,...,...
9995,1,8,12,9995,30975.819382
9996,1,0,16,9996,33906.016980
9997,1,5,11,9997,1410.444529
9998,1,1,19,9998,26337.354156


In [4]:
#living area percentage is our "distribution" vector for pops resettlement, e.g.
#probability for a person appears in the particular house
houses['living_area_percent_all'] = houses['living_area'] / houses['living_area'].sum()

In [5]:
#thats defenetly it now we need total pops value
Total_pops = 1000000
pops_distr = pops_resetelment.PopsResetelment.default_distr(
    distributed_value = Total_pops, 
    probabilities_distribution = houses['living_area_percent_all'].values,
    probabilities_names = houses.building_id.values)

#now we have our distribution, we need to add this to initial dataframe
houses = pd.merge(houses, pd.Series(pops_distr, name = 'pops'), 
                  left_index= True, right_index=True)
#we need to transform pop value type fron float to inteeger
houses['pops'] = houses['pops'].astype(int)

In [6]:
# if we whant to add gender distribution we would do the same
# we need a new distribution vector for gender distribution for example:
gender = ('men', 'women')
gender_probs = np.array([0.48, 0.52]) #obviusly total prob sumb must be 1
houses[['men_p', 'women_p']] = gender_probs

In [8]:
#now we have 2d distribution vectors for each house and we will apply our default_disrt function:
houses[['men', 'women']] = houses.apply(lambda x: pops_resetelment.PopsResetelment.default_distr(distributed_value = x['pops'],
                                                                                                probabilities_distribution = (x['men_p'], x['women_p']),
                                                                                                probabilities_names = gender), 
                                        axis = 1, 
                                        result_type = 'expand')

In [10]:
houses

Unnamed: 0,city_id,administrative_unit_id,municipality_id,building_id,living_area,living_area_percent_all,pops,men_p,women_p,men,women
0,1,7,13,0,15924.836593,0.000052,58,0.48,0.52,25,33
1,1,4,14,1,27617.321590,0.000090,95,0.48,0.52,40,55
2,1,1,10,2,56154.637855,0.000184,186,0.48,0.52,80,106
3,1,0,11,3,14412.242486,0.000047,48,0.48,0.52,19,29
4,1,2,12,4,35465.924357,0.000116,118,0.48,0.52,49,69
...,...,...,...,...,...,...,...,...,...,...,...
9995,1,8,12,9995,30975.819382,0.000101,80,0.48,0.52,37,43
9996,1,0,16,9996,33906.016980,0.000111,105,0.48,0.52,44,61
9997,1,5,11,9997,1410.444529,0.000005,5,0.48,0.52,3,2
9998,1,1,19,9998,26337.354156,0.000086,98,0.48,0.52,41,57
