In [1]:
import fiona 
import geopandas as gpd
import pandas as pd
import re
from bs4 import BeautifulSoup
import os
import shapely

In [2]:
def parse_html(row, dic='None'):
    html_str = row['Description']
    parsed_html = BeautifulSoup(html_str)
    array = parsed_html.body.find_all('td')
    array = array[2:]
    array = [clean_html(str(tag)) for tag in array]
    dic = dict(array[i:i+2] for i in range(0, len(array), 2))
    if not(dic['FID']):
        print(dic)
    return pd.Series(dic)
    
def clean_html(raw_html):
    cleanr = re.compile('<.*?>')
    cleantext = re.sub(cleanr, '', raw_html)
    return cleantext

In [3]:
# Enable fiona driver
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'

# Read file
comercial_df = gpd.read_file('./data/LOTES/COMERCIAL/KML/Lts_Geral_Comercial.kml', driver='KML')

# Drop Z dimension of polygons that occurs often in kml 
comercial_df.geometry = comercial_df.geometry.map(lambda polygon: shapely.ops.transform(lambda x, y, z: (x, y), polygon))

In [4]:
path = './data/LOTES/SERVIÇOS/KML/'

servicos_df = gpd.GeoDataFrame()
for filename in os.listdir(path):
    df = gpd.read_file(path+filename)
    df.geometry = df.geometry.map(lambda polygon: shapely.ops.transform(lambda x, y, z: (x, y), polygon))
    servicos_df = servicos_df.append(df, ignore_index=True)

In [None]:
comercial_df = comercial_df.join(comercial_df.apply(parse_html, axis=1))
comercial_df = comercial_df.drop('Description', 1)

In [None]:
servicos_df = servicos_df.join(servicos_df.apply(parse_html, axis=1))
servicos_df = servicos_df.drop('Description', 1)

servicos_df.columns()

In [39]:
comercial_df = comercial_df[['Grupo', 'Atividade', 'Nume_Ativ', 'Segmento', 'Situacao', 'geometry', 'Area_Terre', 'Area_Total']]
servicos_df = servicos_df[['Grupo', 'Atividade', 'Nume_Ativ', 'Segmento', 'Situacao', 'geometry', 'Area_Terre', 'Area_Total']]

In [43]:
def to_float(v):
    v = v.replace(',', '.')
    v = float(v)
    return v

comercial_df['Area_Terre'] = comercial_df['Area_Terre'].apply(to_float)
comercial_df['Area_Total'] = comercial_df['Area_Total'].apply(to_float)
servicos_df['Area_Terre'] = servicos_df['Area_Terre'].apply(to_float)
servicos_df['Area_Total'] = servicos_df['Area_Total'].apply(to_float)

In [44]:
comercial_df.to_file("./data/LOTES/COMERCIAL/SHP/lotes.shp")
servicos_df.to_file("./data/LOTES/SERVIÇOS/SHP/lotes.shp")