In [1]:
#!pip install tabula-py
#!pip install googlemaps
#!pip install beautifulsoup4
#!pip install wayback-machine-scraper
#!pip install xlwt

In [47]:
import requests
from urllib.parse import urljoin
from urllib.request import urlretrieve
from pathlib import Path
from bs4 import BeautifulSoup
import glob
import pandas as pd
import re
import googlemaps
import tabula
import os

In [29]:
def extract_links_from_webarchive_snapshots(folder: str, tag_name, class_value):
    hrefs = []
    for file in glob.glob(f"{folder}*.snapshot"):
        links = BeautifulSoup(open(file)).find_all(tag_name, {'class': class_value})
        if links:
            if len(links)> 1:
                ignored_links = [l.a['href'] for l in links[1:]]
                print(f"Ignored links: {ignored_links}")
            l = links[0].a['href']
            hrefs.append(l)
    return hrefs

In [43]:
def extract_auctions_from_aler_website(url, output_dir, class_name, BASE_URL = "https://alermipianovendite.it"):
   
    
    r = requests.get(url)
    html = r.text
    elements = BeautifulSoup(html).find_all("td", {"class": class_name})
    for el in elements:
        href = el.a.get('href')
        complete_href = urlretrieve(urljoin(BASE_URL, href), filename = f'{output_dir}/{href.split("/")[-1]}')
        print(f"Downloaded {complete_href}")

In [94]:
def extract_dataframe_from_pdf(pdf_folder):
    
    columns_10 = ['LOTTO', 'CODICE', 'BOX/P.A.', 'LOCALITA', 'INDIRIZZO', 'CIVICO', 'ID', 'PREZZO BASE', 'PREZZO_AG', 'AGGIUDICATARIO']
    columns_9 =  ['LOTTO', 'CODICE', 'LOCALITA', 'INDIRIZZO', 'CIVICO', 'ID', 'PREZZO BASE', 'PREZZO_AG', 'AGGIUDICATARIO']
    columns_9_2020 =['LOTTO', 'CODICE', 'BOX/P.A.', 'LOCALITA', 'INDIRIZZO', 'CIVICO', 'PREZZO BASE', 'PREZZO_AG', 'AGGIUDICATARIO']
    columns_8 =  ['LOTTO', 'CODICE', 'LOCALITA', 'INDIRIZZO', 'CIVICO', 'PREZZO BASE', 'PREZZO_AG', 'AGGIUDICATARIO']
    dfs = []
    
    
    for file in Path(pdf_folder).glob("*.pdf"):
        print(f"Analyzing {file}")

        match = re.search(r'(\d+)([a-z]+)(\d+)', str(file))
        giorno, mese, anno = match.groups()


        tables = tabula.read_pdf(file, pages = "all", multiple_tables = True, pandas_options={'header': None})

        for table in tables:
            if table.shape[1] == 8:
                table.columns = columns_8
                #tabula is unable to split the last two columns, due few space between columns
                if file.stem == 'esito-18febbraio15':
                    table[['PREZZO_AG', 'AGGIUDICATARIO']] = table['AGGIUDICATARIO'].str.split(",00", expand=True)
            elif table.shape[1] == 9: 
                #remove header from the table
                if file.stem == 'esito-24settembre2020':
                    table = table.iloc[1:]
                    table.reset_index(inplace=True, drop=True)

                if (table.loc[0,4].startswith('VIA')) | (table.loc[0,4].startswith('PIAZZA')):
                    table.columns = columns_9_2020
                elif (table.loc[0,3].startswith('VIA')) | (table.loc[0,3].startswith('PIAZZA')):
                    table.columns = columns_9
                else:
                    raise Exception("ERRORE")

            elif table.shape[1] == 10:
                table.columns = columns_10
            else:
                print(table.shape)
            table['GIORNO'] = giorno
            table['MESE'] = mese
            table['ANNO'] = anno
            dfs.append(table)

    data = pd.concat(dfs, axis=0, ignore_index=True) 
    return data
    


In [113]:
def geocode(address_series: pd.Series, api_key:str):
    gmaps = googlemaps.Client(key=api_key)
    addresses = []

    series_unique = address_series.unique()
    print(f"Analyzing {len(series_unique)} addresses")
    for address in series_unique:
        print(address)
        geocode_result = gmaps.geocode(address)
        location = geocode_result[0]['geometry']['location']
        addresses.append({
            'address': address,
            'lat': location['lat'],
            'lng':  location['lng']
        })
    return pd.DataFrame(addresses)

In [181]:
def extract_dataframe_from_webpage(urls):
    
    set_url = set()
    dfs = []
    
    for url in urls:
        auction = list(filter(None, url.split("/")))[-1]
        
        if auction not in set_url:
            print(f"Extracting df about auction {auction}")
            try:
                htables = pd.read_html(url, header=0)
                df = None
                for t in htables:
                    if "LOTTO" in t.columns:
                        df = t
                        break
                if df is None:    
                    print(f"WARNING url {url} doesn't contains data")
                else:    
                    dfs.append(df)
                
                set_url.add(auction)
                
            except Exception as e:
                print(f"Error analyzing url {url}. Error {e}")
    
    return dfs

In [208]:
def normalize_dfs(list_df, columns):
    dfs_filtred = []
    n_max = len(columns)
    
    for df in list_df:
        df = df[df.columns[0:n_max]]
        df.columns = columns
        dfs_filtred.append(df)


    return pd.concat(dfs_filtred)

## Estrazione esiti aste

In [44]:
!mkdir -p ./auction_data

In [46]:
BASE_URL = "https://alermipianovendite.it"
auction_dir = "./auction_data"

#2020-2022
extract_auctions_from_aler_website(url=f"{BASE_URL}/esiti-piano-vendite-2020-2022/",
                                   output_dir=auction_dir,
                                   class_name="column-2")

#to 2020
extract_auctions_from_aler_website(url=f"{BASE_URL}/esiti-piano-vendite-2014-2019/",
                                   output_dir=auction_dir,
                                   class_name="column-1")


Downloaded ('./auction_data/esito-18febbraio2021.pdf', <http.client.HTTPMessage object at 0x12313c3d0>)
Downloaded ('./auction_data/esito-21gennaio2021.pdf', <http.client.HTTPMessage object at 0x12313c390>)
Downloaded ('./auction_data/esito-17dicembre2020.pdf', <http.client.HTTPMessage object at 0x12313c810>)
Downloaded ('./auction_data/esito-19novembre2020.pdf', <http.client.HTTPMessage object at 0x12313cbd0>)
Downloaded ('./auction_data/esito-22ottobre2020.pdf', <http.client.HTTPMessage object at 0x12313c450>)
Downloaded ('./auction_data/esito-24settembre2020.pdf', <http.client.HTTPMessage object at 0x12313c7d0>)
Downloaded ('./auction_data/esito-21luglio2020.pdf', <http.client.HTTPMessage object at 0x12313cb90>)
Downloaded ('./auction_data/esito-05dicembre19.pdf', <http.client.HTTPMessage object at 0x123151b50>)
Downloaded ('./auction_data/esito-24ottobre19.pdf', <http.client.HTTPMessage object at 0x123151650>)
Downloaded ('./auction_data/esito-26settembre19.pdf', <http.client.HTTPM

## Extract tabular data from pdf

In [96]:
data = extract_dataframe_from_pdf(auction_dir)
data.head(3)

Analyzing auction_data/esito-01ottobre15.pdf


Got stderr: Mar 16, 2021 11:35:00 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:00 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:00 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:00 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:00 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:00 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-25luglio19.pdf


Got stderr: Mar 16, 2021 11:35:02 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:02 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:02 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:02 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:02 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:02 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-21febbraio19.pdf


Got stderr: Mar 16, 2021 11:35:03 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:03 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:03 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:03 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:03 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:03 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-28giugno18.pdf


Got stderr: Mar 16, 2021 11:35:05 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:05 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:05 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:05 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:05 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:05 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-18febbraio2021.pdf


Got stderr: Mar 16, 2021 11:35:07 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:07 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:07 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:07 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:07 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:07 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-06ottobre16.pdf


Got stderr: Mar 16, 2021 11:35:08 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:08 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:08 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:08 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:08 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:08 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-24maggio18.pdf


Got stderr: Mar 16, 2021 11:35:10 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:10 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:10 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:10 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:10 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:10 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-12aprile18.pdf


Got stderr: Mar 16, 2021 11:35:12 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:12 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:12 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:12 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:12 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:12 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-15aprile15.pdf


Got stderr: Mar 16, 2021 11:35:13 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:13 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:13 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:13 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:13 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:13 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-27giugno19.pdf


Got stderr: Mar 16, 2021 11:35:15 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:15 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:15 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:15 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:15 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:15 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-08marzo17.pdf


Got stderr: Mar 16, 2021 11:35:17 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:17 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:17 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:17 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:17 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:17 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-14dicembre17.pdf


Got stderr: Mar 16, 2021 11:35:18 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:18 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:18 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:18 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:18 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:18 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-18aprile19.pdf


Got stderr: Mar 16, 2021 11:35:20 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:20 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:20 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:20 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:20 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:20 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-29novembre18.pdf


Got stderr: Mar 16, 2021 11:35:22 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:22 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:22 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:22 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:22 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:22 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-31gennaio17.pdf


Got stderr: Mar 16, 2021 11:35:24 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:24 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:24 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:24 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:24 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:24 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-30maggio16.pdf


Got stderr: Mar 16, 2021 11:35:26 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:26 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:26 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:26 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:26 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:26 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-01febbraio18.pdf


Got stderr: Mar 16, 2021 11:35:28 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:28 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:28 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:28 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:28 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:28 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-21aprile16.pdf


Got stderr: Mar 16, 2021 11:35:29 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:29 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:29 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:29 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:29 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:29 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-21luglio2020.pdf


Got stderr: Mar 16, 2021 11:35:31 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:31 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:31 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:31 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:31 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:31 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-22ottobre2020.pdf


Got stderr: Mar 16, 2021 11:35:33 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:33 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:33 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:33 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:33 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:33 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-05dicembre19.pdf


Got stderr: Mar 16, 2021 11:35:35 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:35 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:35 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:35 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:35 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:35 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-06luglio17.pdf


Got stderr: Mar 16, 2021 11:35:38 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:38 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:38 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:38 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:38 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:38 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-17gennaio19.pdf


Got stderr: Mar 16, 2021 11:35:40 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:40 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:40 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:40 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:40 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:40 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-25maggio17.pdf


Got stderr: Mar 16, 2021 11:35:42 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:42 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:42 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:42 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:42 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:42 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-30giugno16.pdf


Got stderr: Mar 16, 2021 11:35:43 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:43 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:43 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:43 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:43 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:43 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-24settembre2020.pdf


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Analyzing auction_data/esito-19novembre2020.pdf


Got stderr: Mar 16, 2021 11:35:47 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:47 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:47 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:47 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:47 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:47 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-26luglio18.pdf


Got stderr: Mar 16, 2021 11:35:48 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:48 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:48 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:48 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:48 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:48 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-26settembre19.pdf


Got stderr: Mar 16, 2021 11:35:50 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:50 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:50 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:50 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:50 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:50 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-04novembre15.pdf


Got stderr: Mar 16, 2021 11:35:52 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:52 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:52 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:52 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:52 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:52 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-27maggio15.pdf


Got stderr: Mar 16, 2021 11:35:54 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:54 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:54 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:54 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:54 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:54 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-27settembre18.pdf


Got stderr: Mar 16, 2021 11:35:56 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:56 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:56 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:56 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:56 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:56 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-08marzo18.pdf


Got stderr: Mar 16, 2021 11:35:57 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:57 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:57 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:57 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:57 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:57 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-17marzo16.pdf


Got stderr: Mar 16, 2021 11:35:59 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:35:59 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:35:59 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:35:59 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:35:59 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:35:59 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-10novembre17.pdf


Got stderr: Mar 16, 2021 11:36:02 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:36:02 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:36:02 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:36:02 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:36:02 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:36:02 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-15dicembre15.pdf


Got stderr: Mar 16, 2021 11:36:04 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:36:04 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:36:04 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:36:04 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:36:04 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:36:04 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-11febbraio16.pdf


Got stderr: Mar 16, 2021 11:36:06 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:36:06 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:36:06 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:36:06 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:36:06 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:36:06 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-17dicembre2020.pdf


Got stderr: Mar 16, 2021 11:36:08 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:36:08 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:36:08 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:36:08 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:36:08 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:36:08 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-24ottobre19.pdf


Got stderr: Mar 16, 2021 11:36:09 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:36:09 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:36:09 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:36:09 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:36:09 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:36:09 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-31ottobre18.pdf


Got stderr: Mar 16, 2021 11:36:11 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:36:11 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:36:11 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:36:11 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:36:11 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:36:11 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-12aprile17.pdf


Got stderr: Mar 16, 2021 11:36:13 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:36:13 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:36:13 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:36:13 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:36:13 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:36:13 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-21gennaio2021.pdf


Got stderr: Mar 16, 2021 11:36:15 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:36:15 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:36:15 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:36:15 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:36:15 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:36:15 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-05ottobre17.pdf


Got stderr: Mar 16, 2021 11:36:16 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:36:16 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:36:16 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:36:16 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:36:16 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:36:16 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-23maggio19.pdf


Got stderr: Mar 16, 2021 11:36:18 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:36:18 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:36:18 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:36:18 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:36:18 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:36:18 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-18febbraio15.pdf


Got stderr: Mar 16, 2021 11:36:19 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:36:19 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:36:19 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:36:19 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:36:19 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:36:19 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-21marzo19.pdf


Got stderr: Mar 16, 2021 11:36:22 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:36:22 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:36:22 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:36:22 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:36:22 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:36:22 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-02luglio15.pdf


Got stderr: Mar 16, 2021 11:36:23 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:36:23 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:36:23 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:36:23 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:36:23 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:36:23 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Analyzing auction_data/esito-24novembre16.pdf


Got stderr: Mar 16, 2021 11:36:25 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: Your current java version is: 1.8.0_181
Mar 16, 2021 11:36:25 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO: To get higher rendering speed on old java 1.8 or 9 versions,
Mar 16, 2021 11:36:25 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   update to the latest 1.8 or 9 version (>= 1.8.0_191 or >= 9.0.4),
Mar 16, 2021 11:36:25 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or
Mar 16, 2021 11:36:25 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
Mar 16, 2021 11:36:25 AM org.apache.pdfbox.rendering.PDFRenderer suggestKCMS
INFO:   or call System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider")



Unnamed: 0,LOTTO,CODICE,LOCALITA,INDIRIZZO,CIVICO,ID,PREZZO BASE,PREZZO_AG,AGGIUDICATARIO,GIORNO,MESE,ANNO,BOX/P.A.
0,252/15,20148,MILANO,VIA MAC MAHON,'89,,"€ 93.139,00","€ 116.000,00",ROLLERI FRANCESCO,1,ottobre,15,
1,253/15,230196,MILANO,VIA BRAMANTINO,'3,,"€ 80.784,00","€ 0,00",ASTA DESERTA,1,ottobre,15,
2,254/15,230657,MILANO,VIA JACOPINO DA TRADATE,'14,,"€ 67.478,00","€ 68.500,00",BUGARIU LILIANA,1,ottobre,15,


In [97]:
data.shape

(2124, 13)

## Geocoding Apartments

In [106]:
data['address'] = data['INDIRIZZO'] + " " + data['CIVICO'] + ", " + data['LOCALITA']
data.head(3)

Unnamed: 0,LOTTO,CODICE,LOCALITA,INDIRIZZO,CIVICO,ID,PREZZO BASE,PREZZO_AG,AGGIUDICATARIO,GIORNO,MESE,ANNO,BOX/P.A.,address
0,252/15,20148,MILANO,VIA MAC MAHON,'89,,"€ 93.139,00","€ 116.000,00",ROLLERI FRANCESCO,1,ottobre,15,,"VIA MAC MAHON '89, MILANO"
1,253/15,230196,MILANO,VIA BRAMANTINO,'3,,"€ 80.784,00","€ 0,00",ASTA DESERTA,1,ottobre,15,,"VIA BRAMANTINO '3, MILANO"
2,254/15,230657,MILANO,VIA JACOPINO DA TRADATE,'14,,"€ 67.478,00","€ 68.500,00",BUGARIU LILIANA,1,ottobre,15,,"VIA JACOPINO DA TRADATE '14, MILANO"


In [114]:
api_key = "AIzaSyBdCWCLzV83HuTHJYFOzXyd8RlJI6pWNig"

df_addresses = geocode(data['address'], api_key=api_key)


Analyzing 730 addresses
VIA MAC MAHON '89, MILANO
VIA BRAMANTINO '3, MILANO
VIA JACOPINO DA TRADATE '14, MILANO
VIA GASPARE ASELLI '18, MILANO
VIA FILIPPO ABBIATI '5, MILANO
VIA DALMAZIO BIRAGO '2, MILANO
VIA DALMAZIO BIRAGO '4, MILANO
VIA ANGELO INGANNI '52, MILANO
VIA COMASINA '57, MILANO
VIA TEANO '36, MILANO
VIA SANTUARIO DEL SACRO CUORE '3, MILANO
VIALE UNGHERIA '2, MILANO
VIA LUIGI SORDELLO '10, MILANO
VIA CONCILIO VATICANO II '10, MILANO
VIA DEGLI ONTANI '44, MILANO
VIA DEI SALICI '59, MILANO
VIA COSTANTINO BARONI '75, MILANO
VIA ALESSANDRO LITTA MODIGNANI '113, MILANO
VIA ALESSANDRO LITTA MODIGNANI '103, MILANO
VIA ALESSANDRO LITTA MODIGNANI '115, MILANO
VIA FILIPPO DE PISIS '27, MILANO
VIA GIACOMO QUARENGHI '43, MILANO
VIALE ITALIA '34, CORSICO
VIA MARZABOTTO '20, CORSICO
VIA BENVENUTO CELLINI '12, CORSICO
VIA AGOSTINO DE PRETIS '51, MILANO
VIA ENRICO DE NICOLA '8, MILANO
VIALE LAZIO '21, ROZZANO
VIALE LAZIO '76, ROZZANO
VIA DEGLI ANEMONI '3, MILANO
VIA GIUSEPPE ROVANI '311, S

VIA APPENNINI '33, MILANO
VIALE ITALIA '36, CORSICO
VIA BENVENUTO CELLINI '24, CORSICO
VIA AGOSTINO DE PRETIS '3, MILANO
VIA AGOSTINO DE PRETIS '121, MILANO
VIA AGOSTINO DE PRETIS '79, MILANO
VIA AGOSTINO DE PRETIS '21, MILANO
VIA AGOSTINO DE PRETIS '35, MILANO
VIA AGOSTINO DE PRETIS '45, MILANO
VIA ARMANDO SPADINI '15, MILANO
VIA ANGELO MORO '27.E, SAN DONATO MILANE
PIAZZALE SELINUNTE '4, MILANO
VIA ANDREA VERROCCHIO '40, MILANO
VIALE UNGHERIA '19, MILANO
VIA ROMUALDO BONFADINI '94, MILANO
VIA DELLA CAPINERA '6, MILANO
VIA COSTANTINO BARONI '81, MILANO
VIA MICHELE SAPONARO '26, MILANO
VIA FILIPPO DE PISIS '15, MILANO
VIA FRANCESCO CILEA '48, MILANO
VIA FRANCESCO CILEA '46, MILANO
VIA ALEX VISCONTI '23, MILANO
VIA ANTONIO LABRIOLA '9, LAINATE
VIA ANTONIO LISSONI '5, MILANO
VIA ANTONIO LISSONI '15, MILANO
VIA RAFFAELLO GIOLLI '21, MILANO
VIA POSTUMIA '28, MILANO
VIA BENJAMIN CONSTANT '2, MILANO
VIA FELICITE' ROBERT LAMENNAIS '9, MILANO
VIA PATERNO' '18, MILANO
VIA ALBERTO PEPERE '3, MIL

VIA FRANCESCO CILEA '80, MILANO
VIA AGOSTINO DE PRETIS '123, MILANO
VIA ENRICO DE NICOLA '12, MILANO
VIA ANTONIO LISSONI '11, MILANO
VIA BOLOGNA '19, CANEGRATE
VIA EMILIO GOLA '31, MILANO
VIALE OMERO '19, MILANO
VIA DEI SALICI '9, MILANO
VIA DEI SALICI '11, MILANO
VIA EUGENIO CURIEL '21, CORSICO
VIA EUGENIO CURIEL '15, CORSICO
VIA BENVENUTO CELLINI '10, CORSICO
VIA ENRICO DE NICOLA '22, MILANO
VIA GIOVANNI SEBASTIANO BACH '3, PIEVE EMANUELE
VIA CARLO AMORETTI '17, MILANO
VIA BELTRAME CRISTIANI '27, MILANO
VIA VITERBO '33, MILANO
VIA MONTE BALDO '21, MILANO
VIA LORENTEGGIO '205, MILANO
VIA DEGLI ONTANI '50, MILANO
VIA UGO BETTI '46, MILANO
VIA FRANCESCO CILEA '88, MILANO
VIA FRANCESCO CILEA '62, MILANO
VIA FRANCESCO CILEA '138, MILANO
VIA EUGENIO CURIEL '29, CORSICO
VIA AGOSTINO DE PRETIS '91, MILANO
VIA AGOSTINO DE PRETIS '103, MILANO
VIA LIPARI '8, MILANO
VIA DEGLI ONTANI '12, MILANO
VIA DEGLI IPPOCASTANI '16, MILANO
VIA COSTANTINO BARONI '65, MILANO
VIA FRANCESCO CILEA '10, MILANO
VI

In [260]:
data2 = pd.merge(data, df_addresses, how="inner", on="address")
data2.to_excel("./auction_data.xls", index=False)
data2.head(2)

  


Unnamed: 0,LOTTO,CODICE,LOCALITA,INDIRIZZO,CIVICO,ID,PREZZO BASE,PREZZO_AG,AGGIUDICATARIO,GIORNO,MESE,ANNO,BOX/P.A.,address,lat,lng
0,252/15,20148,MILANO,VIA MAC MAHON,'89,,"€ 93.139,00","€ 116.000,00",ROLLERI FRANCESCO,1,ottobre,15,,"VIA MAC MAHON '89, MILANO",45.494911,9.156308
1,112/16,20134,MILANO,VIA MAC MAHON,'89,,"€ 75.840,00","€ 79.150,00",NICOSIA PAOLO,21,aprile,16,,"VIA MAC MAHON '89, MILANO",45.494911,9.156308


In [10]:
#data2[(~data2['AGGIUDICATARIO'].isna()) & (data2['AGGIUDICATARIO'].str.startswith("€"))]

In [11]:
#data2[(data2['GIORNO']=='18') & (data2['MESE']=='febbraio')][:30]

## Scraping Web Archive

Now that we have data about house auction, we need data about the apartment (e.g. size, floor, etc.)

In [118]:
import os
starting_url = "https://alermipianovendite.it/asta-alloggi/"
starting_url_2015 = "http://www.alermipianovendite.it/vendite/venditeasta/"
#dal 2017
os.system("wayback-machine-scraper -a 'alermipianovendite.it/asta-alloggi$' alermipianovendite.it/asta-alloggi")
#dal 2015 al 2016
os.system(" wayback-machine-scraper -a 'www.alermipianovendite.it/vendite/venditeasta' www.alermipianovendite.it/vendite/venditeasta.html")


0

Now we have to analyze web pages extracted in the previous cell. In particular, we must:

    - identifying the href link in the web page related to the storical auction
    - open the extracted url and extract the dataframe containing dara


In [120]:
links_1 = extract_links_from_webarchive_snapshots(folder = "./website/alermipianovendite.it/asta-alloggi/",
                            tag_name = "article",
                            class_value = "category-asteceal"
                           )
len(links_1)

Ignored links: ['https://web.archive.org/web/20200924044539/https://alermipianovendite.it/asta-alloggi-24-settembre-2020/']


22

In [121]:
links_2 = extract_links_from_webarchive_snapshots(folder = "./website/www.alermipianovendite.it/vendite/venditeasta.html/",
                            tag_name = "div",
                            class_value = "globalnews"
                           )
complete_links_2 = ["https://web.archive.org" + l for l in links_2]
len(complete_links_2)

Ignored links: ['/web/20160503020251/http://www.alermipianovendite.it/vendite/venditeasta/21-aste-cela-chiuse/150-esito-ceal-alloggi-apr-2016.html']
Ignored links: ['/web/20160416233327/http://www.alermipianovendite.it/vendite/venditeasta/21-aste-cela-chiuse/145-esito-ceal-alloggi-mar-2016.html']
Ignored links: ['/web/20150512061812/http://www.alermipianovendite.it/vendite/venditeasta/21-aste-cela-chiuse/106-esito-ceal-alloggi-apr-2015.html']
Ignored links: ['/web/20150310234605/http://www.alermipianovendite.it/vendite/venditeasta/21-aste-cela-chiuse/98-esito-ceal-alloggi-feb-2015.html']
Ignored links: ['/web/20150612074732/http://www.alermipianovendite.it/vendite/venditeasta/21-aste-cela-chiuse/113-esito-ceal-alloggi-mag-2015.html']
Ignored links: ['/web/20160218111358/http://www.alermipianovendite.it/vendite/venditeasta/21-aste-cela-chiuse/141-esito-ceal-alloggi-feb-2016.html']
Ignored links: ['/web/20160118112617/http://www.alermipianovendite.it/vendite/venditeasta/21-aste-cela-chiu

22

In [141]:
dfs_1 = extract_dataframe_from_webpage(links_1)

Extracting df about auction asta-alloggi-24-maggio-2018
Extracting df about auction asta-alloggi-22-ottobre-2020
Extracting df about auction asta-alloggi-21-gennaio-2021
Extracting df about auction asta-alloggi-14-dicembre-2017
Extracting df about auction asta-alloggi-24-novembre-2016
Extracting df about auction asta-alloggi-12-aprile-2018
Extracting df about auction asta-alloggi-31-gennaio-2017
Extracting df about auction asta-alloggi-06-luglio-2017
Extracting df about auction asta-alloggi-10-novembre-2017
Extracting df about auction asta-alloggi-01-febbraio-2018-2
Extracting df about auction asta-alloggi-17-dicembre-2020
Extracting df about auction asta-alloggi-25-maggio-2017
Extracting df about auction 4209-2
Extracting df about auction asta-alloggi-05-ottobre-2017


In [186]:
dfs_2 = extract_dataframe_from_webpage(complete_links_2)

Extracting df about auction 148-asta-ceal-mag-2016.html
Extracting df about auction 154-esito-ceal-alloggi-giu-2016.html
Extracting df about auction 118-esito-ceal-alloggi-lug-2015.html
Extracting df about auction 87-asta-gennaio-2015.html
Extracting df about auction 144-asta-ceal-apr-2016.html
Error analyzing url https://web.archive.org/web/20160416233327/http://www.alermipianovendite.it/vendite/venditeasta/18-aste-alloggi/144-asta-ceal-apr-2016.html. Error HTTP Error 404: Article not found
Extracting df about auction 108-asta-maggio-2015.html
Extracting df about auction 100-asta-aprile-2015.html
Extracting df about auction 112-asta-luglio-2015.html
Extracting df about auction 134-esito-ceal-alloggi-dic-2015.html
Extracting df about auction 140-asta-ceal-mar-2016.html
Extracting df about auction 136-asta-ceal-feb-2016.html
Extracting df about auction 120-asta-ottobre-2015.html
Error analyzing url https://web.archive.org/web/20150915042032/http://www.alermipianovendite.it/vendite/vendi

In [230]:
columns_1 = ['LOTTO', 'UOG', 'LOCALITA', 'VIA', 'CIVICO', 'ID', 'LOCALI', 'MQ', 'ASCENSORE', 'CLASSE ENERGETICA', 'TIPOLOGIA', 'STATO GIURIDICO', 'PREZZO BASE ASTA']
columns_2 = ['LOTTO', 'UOG', 'LOCALITA', 'VIA', 'CIVICO', 'ID', 'LOCALI', 'ASCENSORE', 'CLASSE ENERGETICA', 'TIPOLOGIA', 'STATO GIURIDICO', 'PREZZO BASE ASTA', 'MQ']

df_1 = normalize_dfs(dfs_1,  columns_1)
df_2 = normalize_dfs(dfs_2,  columns_2)
df_2.rename({"DESTINAZIONE D'USO": 'TIPOLOGIA'}, inplace = True)

In [231]:
df_1.head(2)

Unnamed: 0,LOTTO,UOG,LOCALITA,VIA,CIVICO,ID,LOCALI,MQ,ASCENSORE,CLASSE ENERGETICA,TIPOLOGIA,STATO GIURIDICO,PREZZO BASE ASTA
0,077/18,UOG4,MILANO,VIA GIOVANNI PASCOLI,'4,,2.0,46,NO,G,ALLOGGIO,PIENA PROP.,"€ 62.928,00"
1,078/18,UOG2,MILANO,VIA ERCOLE FERRARIO,'7,,3.0,99,SI,F,ALLOGGIO,PIENA PROP.,"€203.148,00"


In [232]:
df_2.head(2)

Unnamed: 0,LOTTO,UOG,LOCALITA,VIA,CIVICO,ID,LOCALI,ASCENSORE,CLASSE ENERGETICA,TIPOLOGIA,STATO GIURIDICO,PREZZO BASE ASTA,MQ
0,167/16,NE,MILANO,VIA GIOVANNI PASCOLI,4,,1,NO,G,ALLOGGIO,PIENA PROP.,"€ 48.160,00",
1,168/16,SO,MILANO,VIA JACOPINO DA TRADATE,8,A,2,NO,G,ALLOGGIO,PIENA PROP.,"€ 37.440,00",


In [251]:
cols = ['LOTTO', 'LOCALI', 'MQ', 'CLASSE ENERGETICA', 'TIPOLOGIA', 'STATO GIURIDICO']
data_joined1 = pd.merge(data2, df_1[cols], how="inner", on="LOTTO")
data_joined2 = pd.merge(data2, df_2[cols], how="inner", on="LOTTO")
result = pd.concat([data_joined1,data_joined2])

not_included_ids = set(data2['LOTTO'].unique()) - set(result['LOTTO'].unique())
not_included_df = data2[data2['LOTTO'].isin(not_included_ids)]

## Final Data

In [258]:
print(not_included_df.shape)
not_included_df.head(3)

(1033, 16)


Unnamed: 0,LOTTO,CODICE,LOCALITA,INDIRIZZO,CIVICO,ID,PREZZO BASE,PREZZO_AG,AGGIUDICATARIO,GIORNO,MESE,ANNO,BOX/P.A.,address,lat,lng
0,252/15,20148,MILANO,VIA MAC MAHON,'89,,"€ 93.139,00","€ 116.000,00",ROLLERI FRANCESCO,1,ottobre,15,,"VIA MAC MAHON '89, MILANO",45.494911,9.156308
1,112/16,20134,MILANO,VIA MAC MAHON,'89,,"€ 75.840,00","€ 79.150,00",NICOSIA PAOLO,21,aprile,16,,"VIA MAC MAHON '89, MILANO",45.494911,9.156308
6,253/15,230196,MILANO,VIA BRAMANTINO,'3,,"€ 80.784,00","€ 0,00",ASTA DESERTA,1,ottobre,15,,"VIA BRAMANTINO '3, MILANO",45.497144,9.152361


In [259]:
print(result.shape)
result.head(3)

(1203, 21)


Unnamed: 0,LOTTO,CODICE,LOCALITA,INDIRIZZO,CIVICO,ID,PREZZO BASE,PREZZO_AG,AGGIUDICATARIO,GIORNO,...,ANNO,BOX/P.A.,address,lat,lng,LOCALI,MQ,CLASSE ENERGETICA,TIPOLOGIA,STATO GIURIDICO
0,038/18,230617,MILANO,VIA JACOPINO DA TRADATE,'14,,"€ 68.160,00","€ 102.000,00",BISSOLOTTI FRANCESCO,12,...,18,,"VIA JACOPINO DA TRADATE '14, MILANO",45.495774,9.151054,3.0,71,G,ALLOGGIO,PIENA PROP.
1,146/17,230636,MILANO,VIA JACOPINO DA TRADATE,'14,,"€ 36.480,00","€ 47.500,00",GIAMPAOLO ROSARIA,6,...,17,,"VIA JACOPINO DA TRADATE '14, MILANO",45.495774,9.151054,2.0,38,G,ALLOGGIO,PIENA PROP.
2,241/17,230588,MILANO,VIA JACOPINO DA TRADATE,'14,,"€ 62.220,00","€ 91.110,00",PAVIN ANDREA,10,...,17,,"VIA JACOPINO DA TRADATE '14, MILANO",45.495774,9.151054,3.0,61,G,ALLOGGIO,PIENA PROP.


In [257]:
result.to_csv("./final_data.csv")
not_included_df.to_csv("./final_data.csv")

array(['15', '16', '19', '18', '17', '2020', '2021'], dtype=object)