In [26]:
from get_blocks.blocks_getter import BlocksModel

import os
import geopandas as gpd

def get_blocks():
    path = 'output_data/blocks.parquet'
    if os.path.exists(path):
        pass
    else:
        blocks = BlocksModel(city_name='Санкт-Петербург', city_crs=32636, city_admin_level=5).get_blocks()
        blocks.to_parquet(path)
        del blocks

In [27]:
import pandas as pd
from tqdm import tqdm
tqdm.pandas()

def _get_living_area(row):
    if row['living_area']:
        return row['living_area']
    else:
        if row['is_living']:
            if row['storeys_count']:
                if row['building_area']:
                    living_area = row['building_area'] * row['storeys_count'] * 0.7
                    
                    return living_area
                else:
                    return 0
            else:
                return 0
        else:
            return 0
            


def _get_living_area_pyatno(row):
    if row['living_area']:
        return row['building_area']
    else:
        return 0


def prepare_data_balancer():
    path1 = 'output_data/BLOCKS_AND_BUILDINGS_INFO.parquet'
    path2 = 'output_data/SERVICES_LINKED_TO_BLOCKS.parquet'
    
    if os.path.exists(path1) and os.path.exists(path2):
        pass
    else:
        blocks = gpd.read_parquet('output_data/blocks.parquet')
        services = gpd.read_parquet('output_data/services.parquet')
        buildings = gpd.read_parquet('output_data/buildings.parquet')
        greens = gpd.read_parquet('output_data/greens.parquet')
        parkings = gpd.read_parquet('output_data/parkings.parquet')

        services_and_blocks = gpd.sjoin(services, blocks[['geometry']], predicate='intersects').rename(columns={'index_right':'block_id'}).reset_index(drop=True)
        services_and_blocks.to_parquet('output_data/SERVICES_LINKED_TO_BLOCKS.parquet')
        del services_and_blocks

        greens['geometry'] = greens['geometry'].centroid
        greens = greens[['area', 'capacity', 'geometry']]
        greens.rename(columns={'area': 'current_green_area', 'capacity': 'current_green_capacity'}, inplace=True)

        parkings['geometry'] = parkings['geometry'].centroid
        parkings = parkings[['capacity', 'geometry']]
        parkings.rename(columns={'capacity': 'current_parking_capacity'}, inplace=True)

        buildings['living_area'].fillna(0, inplace=True)
        buildings['storeys_count'].fillna(0, inplace=True)

        buildings['living_area'] = buildings.progress_apply(lambda x: _get_living_area(x), axis=1)
        buildings['living_area_pyatno'] = buildings.progress_apply(lambda x: _get_living_area_pyatno(x), axis=1)
        buildings['total_area'] = buildings['building_area'] * buildings['storeys_count']

        blocks_and_greens = gpd.sjoin(blocks, greens, predicate='intersects', how='left').groupby('id').agg(
                                        {'current_green_capacity': 'sum', 
                                        'current_green_area': 'sum',
                                        })
        blocks_and_greens = blocks_and_greens.reset_index(drop=True).reset_index(drop=False).rename(columns={'index': 'block_id'})

        blocks_and_parkings = gpd.sjoin(blocks, parkings, predicate='intersects', how='left').groupby('id').agg(
                                        {'current_parking_capacity': 'sum'})
        blocks_and_parkings = blocks_and_parkings.reset_index(drop=True).reset_index(drop=False).rename(columns={'index': 'block_id'})

        blocks_and_buildings = gpd.sjoin(blocks, buildings, predicate='intersects', how='left').drop(columns=['id_right', 'index_right']).groupby('id_left').agg(
                                        {'population_balanced': 'sum', 
                                        'building_area': 'sum',
                                        'storeys_count': 'median',
                                        'total_area': 'sum',
                                        'living_area': 'sum',
                                        'living_area_pyatno': 'sum',
                                        })

        blocks_and_buildings = blocks_and_buildings.reset_index(drop=True).reset_index(drop=False).rename(columns={'index': 'block_id'})

        blocks_and_buildings = pd.merge(blocks_and_buildings, blocks_and_greens)
        blocks_and_buildings = pd.merge(blocks_and_buildings, blocks_and_parkings)

        blocks_and_buildings = gpd.GeoDataFrame(pd.merge(blocks, blocks_and_buildings, left_on='id', right_on='block_id'), geometry='geometry')
        blocks_and_buildings.rename(columns={'building_area': 'building_area_pyatno', 'total_area': 'building_area'}, inplace=True)

        blocks_and_buildings['current_industrial_area'] = blocks_and_buildings['building_area_pyatno'] - blocks_and_buildings['living_area_pyatno']
        blocks_and_buildings.rename(columns={'population_balanced': 'current_population', 'storeys_count': 'floors', 'living_area_pyatno': 'current_living_area'}, inplace=True)
        blocks_and_buildings['area'] = blocks_and_buildings['geometry'].area
        blocks_and_buildings.drop(columns=['building_area_pyatno', 'building_area', 'living_area'], inplace=True)

        blocks_and_buildings.to_parquet('output_data/BLOCKS_AND_BUILDINGS_INFO.parquet')
        
        del blocks, services, buildings, greens, parkings, blocks_and_greens, blocks_and_parkings, blocks_and_buildings

In [28]:
get_blocks()
prepare_data_balancer()