# Scraping the page of each listing on immobiliare.it

In [10]:
import requests
from bs4 import BeautifulSoup
import csv
import time

In [11]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.5",
    "Referer": "https://google.com",
    "DNT": "1"
}

In [12]:
master_listings = []
status_list = []
number = 0

# Loop through every page of listings in Torino
for page in range(1,81):
    
    # Delay code by one second to make sure that web server doesn't reject non-human requests
    time.sleep(1)
    
    url = f'https://www.immobiliare.it/en/vendita-case/torino/?criterio=rilevanza&pag={page}&noAste=1'
    
    response = requests.get(url)
    # Log the status of each request to check later if request was successful
    status_list.append(response.status_code)
    
    # Get content of page with Python html parser
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Select the div elements with the relevant info for each house listing
    house_listings = soup.select('div.nd-mediaObject__content.in-card__content.in-realEstateListCard__content')
    
    # Alternative to getting the listings
    # listings = soup.find_all('div', class_='nd-mediaObject__content in-card__content in-realEstateListCard__content')
    
    # Iterate through every listing on the page to get its link and request info from its listing page
    for house in house_listings:
        link = house.select('a.in-card__title')[0]['href']
        
        # Delay code by one second to make sure that web server doesn't reject non-human requests
        time.sleep(1)
        
        # Request the content of the listing page
        listing_url_response = requests.get(link, headers=headers)
        listing_soup = BeautifulSoup(listing_url_response.content, 'html.parser')

        # Select the description list's content     
        dl_elements = listing_soup.select('dl.in-realEstateFeatures__list')
        
        # Create empty dict and variables
        listing_infos = []
        type, rooms, total_bldg_floors, year, conditions, ee, price, surface, floor = '','','','','','','','',''

        # Loop through all of the description list tables
        for dl in dl_elements:

            # Loop through all the description terms and values
            for dt_element, dd_element in zip(dl.find_all('dt'), dl.find_all('dd')):
                term = dt_element.text.strip()  # Get the text content of the description term
                value = dd_element.text.strip()  # Get the text content of the description value

                if term == 'type':
                    type = value

                elif term == 'rooms':
                    rooms_det = value

                elif term == 'total building floors':
                    total_bldg_floors = value

                elif term == 'year of construction':
                    year = value

                elif term == 'condition':
                    condition = value

                elif term == 'Energy Efficiency':
                    try:
                        ee = dd_element.select('span')[0]['data-class']
                    except IndexError:
                        ee = ''

                elif term == 'price':
                    price = value

                elif term == 'surface':
                    surface = value

                elif term == 'floor':
                    floor = value

                else:
                    continue

        try:
            bathrooms = listing_soup.find('li', {'aria-label': 'bathrooms'}).find('div', class_='in-feat__data').get_text(strip=True)

        except AttributeError:
            bathrooms = ''

        try:
            rooms = listing_soup.find('li', {'aria-label': 'rooms'}).find('div', class_='in-feat__data').get_text(strip=True)

        except AttributeError:
            rooms = ''
            
        try:
            address = listing_soup.select('h1.in-titleBlock__title')[0].text.strip()
            
        except IndexError:
            address = ''


        infos = {
            'Address description': address,
            'Price': price,
            'Rooms': rooms,
            'Rooms detailed': rooms_det,
            'Surface area': surface,
            'Bathrooms': bathrooms,
            'Floor': floor,
            'House type': type,
            'Total building floors': total_bldg_floors,
            'Year of construction': year,
            'Condition': condition,
            'Energy Efficiency': ee,
            'URL': link,
        }

        master_listings.append(infos)


In [13]:
keys = master_listings[0].keys()

# Write dictionary of listing info 
with open('houses-torino-detailed.csv', 'w', newline='') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(master_listings)

In [15]:
import pandas as pd

torino_houses = pd.DataFrame(master_listings)
torino_houses.to_csv('C:/Users/Hans/Desktop/Jupyter/datasets/torino-house-prices/houses-torino-detailed-df.csv', index=False)

In [16]:
torino_houses = pd.read_csv('houses-torino-detailed-df.csv')
torino_houses.head()

Unnamed: 0,Address description,Price,Rooms,Rooms detailed,Surface area,Bathrooms,Floor,House type,Total building floors,Year of construction,Condition,Energy Efficiency,URL
0,"Apartment via Albenga 2, Lingotto, Turin","€ 165,000",5,"5 (3 bedrooms, 2 others), 1 bathroom, kitchen ...",100 m²,,"3°, with lift",Apartment | Full ownership | Medium property c...,9 floors,1960.0,To be refurbished,F,https://www.immobiliare.it/en/annunci/99929544/
1,"2-room flat via Duino 184BIS, Mirafiori Sud - ...","€ 71,000",2,"2 (1 bedroom, 1 other), 1 bathroom, kitchenette",60 m²,,"6°, with lift",Apartment | Full ownership | Medium property c...,6 floors,1960.0,Good condition / Liveable,F,https://www.immobiliare.it/en/annunci/97514484/
2,"Apartment corso siracusa, 79, Santa Rita, Turin","€ 235,000",5,"5 (4 bedrooms, 1 other), 1 bathroom, kitchen d...",128 m² | commercial 134.5 m² - See detail,,"2°, with lift",Apartment | Full ownership | Stately property ...,7 floors,1969.0,Good condition / Liveable,E,https://www.immobiliare.it/en/annunci/101585695/
3,"Apartment via XX SETTEMBRE, 9, Via Roma, Turin","€ 735,000",5+,"5+ (2 bedrooms, 5 others), 3 bathrooms, kitche...",235 m²,3.0,"4°, with lift",Apartment | Full ownership | Stately property ...,4 floors,1910.0,Excellent / Refurbished,D,https://www.immobiliare.it/en/annunci/99936782/
4,"4-room flat via STELVIO 67, Pozzo Strada, Turin","€ 248,000",4,"4 (2 bedrooms, 2 others), 2 bathrooms, kitchen...",145 m²,2.0,"3°, with lift",Apartment | Full ownership | Stately property ...,7 floors,1972.0,Good condition / Liveable,G,https://www.immobiliare.it/en/annunci/92280220/


In [17]:
# Show full column width
pd.set_option('display.max_colwidth', None)

In [19]:
torino_houses.head()

Unnamed: 0,Address description,Price,Rooms,Rooms detailed,Surface area,Bathrooms,Floor,House type,Total building floors,Year of construction,Condition,Energy Efficiency,URL
0,"Apartment via Albenga 2, Lingotto, Turin","€ 165,000",5,"5 (3 bedrooms, 2 others), 1 bathroom, kitchen diner",100 m²,,"3°, with lift",Apartment | Full ownership | Medium property class,9 floors,1960.0,To be refurbished,F,https://www.immobiliare.it/en/annunci/99929544/
1,"2-room flat via Duino 184BIS, Mirafiori Sud - Onorato Vigliani, Turin","€ 71,000",2,"2 (1 bedroom, 1 other), 1 bathroom, kitchenette",60 m²,,"6°, with lift",Apartment | Full ownership | Medium property class,6 floors,1960.0,Good condition / Liveable,F,https://www.immobiliare.it/en/annunci/97514484/
2,"Apartment corso siracusa, 79, Santa Rita, Turin","€ 235,000",5,"5 (4 bedrooms, 1 other), 1 bathroom, kitchen diner",128 m² | commercial 134.5 m² - See detail,,"2°, with lift",Apartment | Full ownership | Stately property class,7 floors,1969.0,Good condition / Liveable,E,https://www.immobiliare.it/en/annunci/101585695/
3,"Apartment via XX SETTEMBRE, 9, Via Roma, Turin","€ 735,000",5+,"5+ (2 bedrooms, 5 others), 3 bathrooms, kitchen diner",235 m²,3.0,"4°, with lift",Apartment | Full ownership | Stately property class,4 floors,1910.0,Excellent / Refurbished,D,https://www.immobiliare.it/en/annunci/99936782/
4,"4-room flat via STELVIO 67, Pozzo Strada, Turin","€ 248,000",4,"4 (2 bedrooms, 2 others), 2 bathrooms, kitchen diner",145 m²,2.0,"3°, with lift",Apartment | Full ownership | Stately property class,7 floors,1972.0,Good condition / Liveable,G,https://www.immobiliare.it/en/annunci/92280220/
