In [1]:
"""
Read predicted dataset, match links to given offers and send an email notification
with summary of the most interesting offers.
"""
import os
import logging as log

import pandas as pd

import columns
from common import (
    CONCATED_DATA_PATH,
    PREDICTED_DATA_PATH,
    logs_conf,
)
from s3_client import s3_client


log.basicConfig(**logs_conf)

s3_client = s3_client()

def notify_task():
    log.info("Starting notify task...")

    predicted_df_s = s3_client.read_newest_df_from_s3(PREDICTED_DATA_PATH, dtype='sale')
    concated_df_s = s3_client.read_newest_df_from_s3(CONCATED_DATA_PATH, dtype='sale')
    df_s = predicted_df_s.merge(concated_df_s[[columns.OFFER_ID, columns.URL, columns.TITLE]],
                                on=columns.OFFER_ID,
                                how='left')
    
    predicted_df_r = s3_client.read_newest_df_from_s3(PREDICTED_DATA_PATH, dtype='rent')
    concated_df_r = s3_client.read_newest_df_from_s3(CONCATED_DATA_PATH, dtype='rent')
    df_r = predicted_df_r.merge(concated_df_r[[columns.OFFER_ID, columns.URL, columns.TITLE]],
                                on=columns.OFFER_ID,
                                how='left')
    return df_s, df_r

#    notification_html = prepare_notification(predicted_df_s, concated_df_s, predicted_df_r, concated_df_r)
#    response = send_notification(notification_html)
#    if response:
#        log.info("Successfully finished notifiation task.")
#

credentials.py 10:22:09 INFO: Found credentials in shared credentials file: ~/.aws/credentials


In [6]:
df_s_, df_r_ = notify_task() 

<ipython-input-1-94cdfc4b0488> 10:24:16 INFO: Starting notify task...
s3_client.py 10:24:16 INFO: Downloading sale/predicted/sale_predicted_2020_01_03T00_35_15.parquet from flats-data bucket ...
s3_client.py 10:24:19 INFO: Successfully downloaded sale/predicted/sale_predicted_2020_01_03T00_35_15.parquet from flats-data bucket.
s3_client.py 10:24:20 INFO: Downloading sale/concated/sale_concated_2020_01_02T00_20_10.csv from flats-data bucket ...
s3_client.py 10:24:42 INFO: Successfully downloaded sale/concated/sale_concated_2020_01_02T00_20_10.csv from flats-data bucket.
s3_client.py 10:24:49 INFO: Downloading rent/predicted/rent_predicted_2020_01_03T00_22_00.parquet from flats-data bucket ...
s3_client.py 10:24:50 INFO: Successfully downloaded rent/predicted/rent_predicted_2020_01_03T00_22_00.parquet from flats-data bucket.
s3_client.py 10:24:50 INFO: Downloading rent/concated/rent_concated_2020_01_02T00_17_47.csv from flats-data bucket ...
s3_client.py 10:25:00 INFO: Successfully downl

In [42]:
df_s = df_s_
df_r = df_r_

In [43]:
pd.options.display.max_colwidth=1000 

df_s['price_diff'] = df_s[columns.PRICE_M2] - df_s[columns.SALE_PRED]
df_s = df_s[df_s[columns.DATE_ADDED] > '2019-12-31']
#df_s = df_s[df_s[columns.PRICE_M2]<20000]
df_s = df_s[df_s['price_diff'] < -1000]
top_10 = df_s.sort_values('price_diff')
top_10 = top_10[[columns.DATE_ADDED, columns.TITLE, columns.SIZE, columns.URL, columns.PRICE_M2, columns.SALE_PRED, 'price_diff']]
top_10.head(30)

Unnamed: 0,date_added__offer,title__offer,size__offer,url__offer,price_m2__offer,sale__prediction,price_diff
101,2020-01-01,"Warszawa, Mokotów, ul. Jana III Sobieskiego",178.5,https://www.morizon.pl/oferta/sprzedaz-mieszkanie-warszawa-mokotow-jana-iii-sobieskiego-178m2-mzn2035427766,11204.48,15721.53884,-4517.05884
95,2020-01-01,"Kraków, Stadionowa",135.0,https://www.morizon.pl/oferta/sprzedaz-mieszkanie-krakow-stadionowa-135m2-mzn2035427493,5148.15,7350.424975,-2202.274975
4,2020-01-01,"Łódź, Stefana Żeromskiego",45.0,https://www.morizon.pl/oferta/sprzedaz-mieszkanie-lodz-stefana-zeromskiego-45m2-mzn2035427746,3311.11,5497.53234,-2186.42234
6,2020-01-01,"Szczecin, Niebuszewo, Niemierzyńska",87.2,https://www.morizon.pl/oferta/sprzedaz-mieszkanie-szczecin-niebuszewo-niemierzynska-87m2-mzn2035427136,2282.11,4417.650031,-2135.540031
0,2020-01-01,"Szczecin, Centrum, Bohaterów Getta Warszawskiego",35.97,https://www.morizon.pl/oferta/sprzedaz-mieszkanie-szczecin-centrum-bohaterow-getta-warszawskiego-35m2-mzn2035427180,2752.29,4637.628737,-1885.338737
87,2020-01-01,"Warszawa, Praga-Południe, Meksykańska",63.0,https://www.morizon.pl/oferta/sprzedaz-mieszkanie-warszawa-praga-poludnie-meksykanska-63m2-mzn2035427342,8253.97,9801.650175,-1547.680175
2,2020-01-01,"Szczecin, Drzetowo, Radogoska",43.97,https://www.morizon.pl/oferta/sprzedaz-mieszkanie-szczecin-drzetowo-grabowo-radogoska-43m2-mzn2035427178,3229.47,4573.259524,-1343.789524
100,2020-01-01,"Warszawa, Śródmieście, Dobra",118.0,https://www.morizon.pl/oferta/sprzedaz-mieszkanie-warszawa-srodmiescie-dobra-118m2-mzn2035427341,11000.0,12326.471419,-1326.471419
33,2020-01-01,"Lublin, Czuby, Bociania",61.0,https://www.morizon.pl/oferta/sprzedaz-mieszkanie-lublin-czuby-bociania-61m2-mzn2035427231,4737.7,5795.260044,-1057.560044


In [45]:
top_10.round(0).to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>date_added__offer</th>\n      <th>title__offer</th>\n      <th>size__offer</th>\n      <th>url__offer</th>\n      <th>price_m2__offer</th>\n      <th>sale__prediction</th>\n      <th>price_diff</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>101</th>\n      <td>2020-01-01</td>\n      <td>Warszawa, Mokotów,  ul. Jana III Sobieskiego</td>\n      <td>178.0</td>\n      <td>https://www.morizon.pl/oferta/sprzedaz-mieszkanie-warszawa-mokotow-jana-iii-sobieskiego-178m2-mzn2035427766</td>\n      <td>11204.0</td>\n      <td>15722.0</td>\n      <td>-4517.0</td>\n    </tr>\n    <tr>\n      <th>95</th>\n      <td>2020-01-01</td>\n      <td>Kraków,  Stadionowa</td>\n      <td>135.0</td>\n      <td>https://www.morizon.pl/oferta/sprzedaz-mieszkanie-krakow-stadionowa-135m2-mzn2035427493</td>\n      <td>5148.0</td>\n      <td>7350.0</td>\n      <td>-2202.0</td>\n    </tr>\n    <t