In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import requests

In [2]:
page_1 = 'https://www.immobiliare.it/en/vendita-case/milano/'
pages = [page_1]

# Get pages from the 2nd page to the 80th page
for page_number in range(2, 81):
    url = 'https://www.immobiliare.it/en/vendita-case/milano/?pag=' + str(page_number)
    pages.append(url)

In [3]:
# Get all announcements
announcements_urls = []

for url in pages:
    response = requests.get(url)
    soup = BeautifulSoup(response.text, features='html.parser')
    announcements = soup.select('.in-reListCard__title')
    page_urls = [url.get("href") for url in announcements]
    announcements_urls.append(page_urls)

In [4]:
# It combines all the URLs into a single flat list.(used to flatten announcements_urls)
all_announcements_urls = [url for page in announcements_urls for url in page]
all_announcements_urls[:20]

['https://www.immobiliare.it/en/annunci/106903681/',
 'https://www.immobiliare.it/en/annunci/106886279/',
 'https://www.immobiliare.it/en/annunci/106888689/',
 'https://www.immobiliare.it/en/annunci/106892137/',
 'https://www.immobiliare.it/en/annunci/106889543/',
 'https://www.immobiliare.it/en/annunci/106894335/',
 'https://www.immobiliare.it/en/annunci/106893519/',
 'https://www.immobiliare.it/en/annunci/106892721/',
 'https://www.immobiliare.it/en/annunci/106897805/',
 'https://www.immobiliare.it/en/annunci/106879571/',
 'https://www.immobiliare.it/en/annunci/106895311/',
 'https://www.immobiliare.it/en/annunci/106897733/',
 'https://www.immobiliare.it/en/annunci/106884301/',
 'https://www.immobiliare.it/en/annunci/106882809/',
 'https://www.immobiliare.it/en/annunci/106884725/',
 'https://www.immobiliare.it/en/annunci/106887925/',
 'https://www.immobiliare.it/en/annunci/106890161/',
 'https://www.immobiliare.it/en/annunci/106892053/',
 'https://www.immobiliare.it/en/annunci/106898

In [5]:
len(all_announcements_urls)

2000

In [6]:
# Go to each announcement and get all the information
df_1 = pd.DataFrame()
df_2 = pd.DataFrame()
df_3 = pd.DataFrame()
df_4 = pd.DataFrame()

for index, url in enumerate(all_announcements_urls):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, features='html.parser')
    
    main_data = soup.select('.nd-list__item.in-feat__item')
    main_data_labels = ['price_euro', 'number_rooms', 'area_m2', 'bathrooms', 'floor_number']
    main_data_values = [item.get_text() for item in main_data]
    main_data_dict = dict(zip(main_data_labels, main_data_values))
    
    description = soup.select('.in-readAll.in-readAll--lessContent')
    description_labels = ['description']
    description_values = [item.get_text() for item in description]
    description_dict = dict(zip(description_labels, description_values))
    
    other_features = soup.select('.in-realEstateFeatures__title')
    other_features_labels = [item.get_text() for item in other_features]
    other_features = soup.select('.in-realEstateFeatures__value')
    other_features_values = [item.get_text() for item in other_features]
    other_features_dict = dict(zip(other_features_labels, other_features_values))
    
    location = soup.select('.in-location')
    location_labels = ['city', 'neighborhood', 'street']
    location_values = [item.get_text() for item in location]
    location_dict = dict(zip(location_labels, location_values))
    
    data_1 = pd.DataFrame(main_data_dict, index=[index])
    data_2 = pd.DataFrame(description_dict, index=[index])
    data_3 = pd.DataFrame(other_features_dict, index=[index])
    data_4 = pd.DataFrame(location_dict, index=[index])
    
    df_1 = pd.concat([df_1, data_1])
    df_2 = pd.concat([df_2, data_2])
    df_3 = pd.concat([df_3, data_3])
    df_4 = pd.concat([df_4, data_4])
    
# Put all the data in a dataframe
data = pd.concat([df_1, df_2, df_3, df_4], axis=1)
    
data['URL'] = all_announcements_urls

In [7]:
data.to_csv('milano_house_prices_raw_data_.csv')

In [8]:
data

Unnamed: 0,price_euro,number_rooms,area_m2,bathrooms,floor_number,description,Reference and listing Date,contract,type,surface,...,Reference,Delegato alla vendita,Curatore,Geometra,unit,Construction start and expected delivery date,city,neighborhood,street,URL
0,"€ 289,000",3,100m²,1,4,Si propone in vendita in esclusiva un ampio ap...,CNB6 - 10/24/2023,Sale,Apartment | Full ownership | Stately property ...,100 m² - See detail,...,,,,,,,Milan,Baggio,Via Val Cannobina,https://www.immobiliare.it/en/annunci/106903681/
1,"€ 950,000",234m²,3,5,,A few steps from numerous neighborhood service...,LU.P5.R6_533 - 10/24/2023,Sale,Apartment | Full ownership | Stately property ...,234 m² - See detail,...,,,,,,,Milan,Ponte Nuovo,Via del Ricordo,https://www.immobiliare.it/en/annunci/106886279/
2,"€ 398,000",2,78m²,1,2,"RIF: Divina Servizi Immobiliari di Seregno, Pi...",EK-106888689 - 10/24/2023,Sale,Apartment | Full ownership | Stately property ...,78 m² - See detail,...,,,,,,,Milan,Argonne - Corsica,Via Druso,https://www.immobiliare.it/en/annunci/106888689/
3,"€ 420,000",3,85m²,1,2,Zona Crescenzago e più precisamente in Via Pri...,CS- Vittorelli - 10/24/2023,Sale,Apartment | Full ownership | Stately property ...,85 m² - See detail,...,,,,,,,Milan,Crescenzago,Via Jacopo Vittorelli,https://www.immobiliare.it/en/annunci/106892137/
4,"€ 780,000",3,80m²,1,5,Ref: ST6 - Tempocasa in Via Bergamo 11 offers ...,ST6 - 10/24/2023,Sale,Apartment | Full ownership | Stately property ...,80 m² - See detail,...,,,,,,,Milan,Porta Romana - Medaglie d'Oro,Via Gerolamo Tiraboschi 6,https://www.immobiliare.it/en/annunci/106889543/
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,"€ 160,000",2,54m²,1,G,"via carolina invernizio - in a 90s building, w...",160 INVERNIZIO - 10/14/2023,Sale - Download brochure,Apartment | Full ownership | Medium property c...,54 m² - See detail,...,,,,,,,Milan,Vialba,Via Carolina Invernizio,https://www.immobiliare.it/en/annunci/106660639/
1996,"€ 214,000",2,70m²,1,,"via carolina invernizio - in stabile anni 90, ...",214 INVERNIZIO - 10/14/2023,Sale - Download brochure,Apartment | Full ownership | Medium property c...,70 m² - See detail,...,,,,,,,Milan,Vialba,Via Carolina Invernizio,https://www.immobiliare.it/en/annunci/106660615/
1997,"€ 299,000",2,50m²,1,5,ARE YOU LOOKING FOR A PERFECT TWO-ROOM APARTME...,sabotino7 - 10/14/2023,Sale,Apartment | Full ownership | Stately property ...,50 m² - See detail,...,,,,,,,Milan,Porta Romana - Medaglie d'Oro,Viale Sabotino,https://www.immobiliare.it/en/annunci/106656557/
1998,"€ 450,000",3,94m²,1,2,Rif: GG42 - INTERNO: la soluzione proposta è u...,GG42 - 10/14/2023,Sale,Apartment | Full ownership | Medium property c...,94 m² - See detail,...,,,,,,,Milan,Centrale,Via Ponte Seveso,https://www.immobiliare.it/en/annunci/106654875/
