In [30]:
import boto3
import pandas as pd

import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
import urllib

In [8]:
dynamodb = boto3.resource('dynamodb')
idealista_scrap_table = dynamodb.Table('scrapped_ads')
imovirtual_scrap_table = dynamodb.Table('imovirtual_scrap')
idealista_scrap_rows = idealista_scrap_table.scan()
imovirtual_scrap_rows = imovirtual_scrap_table.scan()

In [12]:
df_idealista = pd.DataFrame(idealista_scrap_rows['Items'])
df_imovirtual = pd.DataFrame(imovirtual_scrap_rows['Items'])

In [127]:
col_name = 'price'
func_name = 'count'

address_dict = {}
address_simple = []
for r in df_idealista['geo']:
    key = None
    if r == r and len(r) > 0 and len(r[-1]) > 0:
        key = '{}, {}'.format(r[-1]['neighbourhood'],r[-1]['region'])
    address_simple.append(key)


for name, group in df_imovirtual.groupby('address'):
    address_dict[name] = { 'imovirtual': getattr(group[col_name], func_name)(), 'idealista': None }
        
df_idealista['address_simple'] = address_simple
        
for name, group in df_idealista.groupby('address_simple'):
    if name in address_dict:
        address_dict[name]['idealista'] =  getattr(group[col_name], func_name)()
    else:
        address_dict[name] = { 'idealista': getattr(group[col_name], func_name)(), 'imovirtual': None }
        
print('{:<40}{:<15}{}'.format('Address','Idealista','Imovirtual'))
for k, v in address_dict.items():
    print('{:<40}{:<15}{}'.format(k,v['idealista'] or '-',v['imovirtual'] or '-'))

Address                                 Idealista      Imovirtual
Ajuda, Lisboa                           8              4
Alcântara, Lisboa                       11             5
Alvalade, Lisboa                        12             6
Areeiro, Lisboa                         -              6
Arroios, Lisboa                         -              10
Avenidas Novas, Lisboa                  -              48
Beato, Lisboa                           1              1
Belém, Lisboa                           -              5
Benfica, Lisboa                         3              1
Campo de Ourique, Lisboa                21             8
Campolide, Lisboa                       -              2
Carnide, Lisboa                         32             2
Estrela, Lisboa                         -              17
Lumiar, Lisboa                          40             11
Marvila, Lisboa                         1              2
Misericórdia, Lisboa                    -              3
Parque das Nações,

In [135]:
# ---------------------------
# Plot Color ad per municipality
# ---------------------------

df_group_locality = df_idealista[df_idealista.address_simple.notnull()].groupby('address_simple')

data = []
for name, group in df_group_locality:
    subplot_id = "mapbox" + name
    trace = go.Scattermapbox(
        name=name,
        showlegend=True,
        lat=[ g[-1]['latitude'] for g in df_idealista[df_idealista.address_simple == name].geo],
        lon=[ g[-1]['longitude'] for g in df_idealista[df_idealista.address_simple == name].geo],
        mode="markers",
        marker=dict(size=14),
        text=name,
        subplot="mapbox",
    )
    # fig.add_trace(trace)
    data.append(trace)

layout = go.Layout(
    autosize=True,
    height=700,
    title="Color ad by area",
    # grid={"rows": rows, "columns": cols, "xgap": 0.1, "ygap": 0.2},
)


map_center = go.layout.mapbox.Center(
    #lat=df_idealista["latitude"].mean(), lon=df_idealista["longitude"].mean()
    lat=df_idealista.iloc[1].geo[-1]['latitude'], lon=df_idealista.iloc[1].geo[-1]['longitude']
)
layout["mapbox"] = dict(style="carto-positron", center=map_center, zoom=11,)

fig = go.Figure(data=data, layout=layout)
fig