In [12]:
import fiona 
import geopandas as gpd
import pandas as pd
import re
from bs4 import BeautifulSoup
import os
import shapely
import subprocess

In [13]:
def to_kml(df, output):
    command = "ogr2ogr -f KML {0}.kml {0}.json ".format(output)
    with open('{0}.json'.format(output), 'w') as f:
        f.write(df.to_crs(epsg=4326).to_json())
        
    subprocess.call(command,shell=True)
    
def export(df, output):
    kml_output = './OUTPUT/KML/' + output 
    csv_output = './OUTPUT/CSV/' + output + '.csv'
    shp_output = './OUTPUT/SHP/' + output
    
    to_kml(df, kml_output)
    df.to_file(shp_output)
    without_geometry = df.loc[:, df.columns != 'geometry']
    without_geometry.to_csv(csv_output)

In [14]:
def parse_html(row, dic='None'):
    html_str = row['Description']
    parsed_html = BeautifulSoup(html_str)
    array = parsed_html.body.find_all('td')
    array = array[2:]
    array = [clean_html(str(tag)) for tag in array]
    dic = dict(array[i:i+2] for i in range(0, len(array), 2))
    if not(dic['FID']):
        print(dic)
    return pd.Series(dic)
    
def clean_html(raw_html):
    cleanr = re.compile('<.*?>')
    cleantext = re.sub(cleanr, '', raw_html)
    return cleantext

In [15]:
# Enable fiona driver
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'

# Read file
comercial_df = gpd.read_file('./data/LOTES/COMERCIAL/KML/Lts_Geral_Comercial.kml', driver='KML')

# Drop Z dimension of polygons that occurs often in kml 
comercial_df.geometry = comercial_df.geometry.map(lambda polygon: shapely.ops.transform(lambda x, y, z: (x, y), polygon))

In [16]:
path = './data/LOTES/SERVIÇOS/KML/'

servicos_df = gpd.GeoDataFrame()
for filename in os.listdir(path):
    df = gpd.read_file(path+filename)
    df.geometry = df.geometry.map(lambda polygon: shapely.ops.transform(lambda x, y, z: (x, y), polygon))
    servicos_df = servicos_df.append(df, ignore_index=True)

In [17]:
comercial_df = comercial_df.join(comercial_df.apply(parse_html, axis=1))
comercial_df = comercial_df.drop('Description', 1)

In [18]:
servicos_df = servicos_df.join(servicos_df.apply(parse_html, axis=1))
servicos_df = servicos_df.drop('Description', 1)

In [19]:
servicos_df.columns

Index(['Name', 'geometry', 'Abre_Tilo', 'Abre_Tplo', 'Apsa_Imov', 'Area',
       'Area_Predi', 'Area_Terre', 'Area_Total', 'Arvore', 'Atividade',
       'Bloc_Imov', 'Boca_Lobo', 'CODI_CART', 'Calcada', 'Classifica',
       'Cobertura', 'Codi_Car_1', 'Codi_Face', 'Codi_Imov', 'Codi_Lote',
       'Codi_Quad', 'Codi_Seto', 'Codi_Subl', 'Codi_Vila', 'Coleta',
       'Data_Exp_1', 'Data_Exped', 'Data_Proce', 'Debito', 'Desc_Bair',
       'Desc_Logr', 'Dive_Codi', 'Elevacao', 'Elevador', 'Esquadrias',
       'Estado_Con', 'FID', 'Forro', 'Frente', 'Gale_Pluv', 'Garagem', 'Grupo',
       'Hectares', 'Insc_Ante', 'Inst_Eletr', 'Inst_Sanit', 'Jardim',
       'Lig_Agua', 'Lim_Frente', 'Lim_Latera', 'Loca_Cart', 'Macro_Zona',
       'Marc_Posta', 'Nume_Ativ', 'Nume_Cep', 'Nume_Imov', 'Nume_Proc_',
       'Numero_Alv', 'Numero_Hab', 'Ocupacao', 'Padrao', 'Patrimonio',
       'Pedologia', 'Perimeter', 'Piscina', 'Piso', 'Poste', 'Profundida',
       'Proprietar', 'Qtde_PV_Ed', 'Qtde_Pavim', 'Qtde_

In [20]:
comercial_df = comercial_df[['Grupo', 'Atividade', 'Nume_Ativ', 'Segmento', 'Situacao', 'geometry', 'Area_Terre', 'Area_Total', 'Area_Predi']]
servicos_df = servicos_df[['Grupo', 'Atividade', 'Nume_Ativ', 'Segmento', 'Situacao', 'geometry', 'Area_Terre', 'Area_Total', 'Area_Predi']]

In [21]:
def to_float(v):
    v = v.replace(',', '.')
    v = float(v)
    return v

to_convert = ['Area_Terre', 'Area_Predi', 'Area_Total']
dfs = [comercial_df, servicos_df]

for column in to_convert:
    for df in dfs:
        df[column] = df[column].apply(to_float)

In [22]:
for column in to_convert:
    for df in dfs:
        indexed = df[df[column] == 0 ].index
        df.drop(indexed, inplace=True)

In [23]:
comercial_df.to_file("./data/LOTES/COMERCIAL/SHP/lotes.shp")
servicos_df.to_file("./data/LOTES/SERVIÇOS/SHP/lotes.shp")