In [122]:
""" Search flat results on wg-gesucht.de """
from __future__ import unicode_literals, absolute_import, generators, \
    print_function

from lxml import html
from urllib import urlencode
import requests
from datetime import datetime, timedelta
import locale
import pandas as pd

locale.setlocale(locale.LC_TIME, str('de_DE.UTF-8')) # You can view a list of available locales on your machine with the locale -a command.

bse_url = 'http://www.wg-gesucht.de/wohnungen-in-Berlin.8.2.0.0.html?'

def search(base_url, category, rent_type, minSize, maxPrice, minRooms, maxRooms, exc=2, balcony=0, pets=0, furnished=0):
        """ Search using a get request including flat details.
        :param category: type of flat
        :param rent_type: type of contract
        :param minSize: minimum size in quare meters
        :param maxPrice: max rent price
        :param minRooms: minimum number of rooms
        :param maxRooms: max number of rooms
        :param exc: exchange flat y/n (optional)
        :param balcony: balcony y/n (optional)
        :param pets: pets allowed (optional)
        :param furnished: furnished y/n (optional)
        """
        params = {'offer_filter': 1, 'city_id': 8,
                  'category': category, 'rent_type': rent_type, 
                  'sMin': minSize, 'rMax': maxPrice,
                 'exc': exc, 'rmMin': minRooms,
                 'rmMax': maxRooms, 'bal': balcony,
                 'pet': pets, 'fur': furnished}
        
        response = requests.get('{}{}'.format(base_url, urlencode(params)))
        return parse_response(response)

def grab_xpath_text(element, xpath):
    """ Given an element and xpath pattern, return text content.
    :param element: lxml element
    :param xpath: string
    returns string
    """
    data = element.xpath(xpath)
    if len(data) == 1:
        return data[0].text
    elif len(data) > 1:
        return [x.text for x in data]
    return ''
    
def parse_response(response):
    """ Given a requests response object, return a list of dictionaries
    containing the pertinent flat info.
    :params response: response obj
    returns list of dictionaries
    """
    page = html.fromstring(response.content)
    results = page.xpath('//table/tbody/tr')
    active = [res for res in results if grab_xpath_text(res, 'td[contains(@class, "datum")]/a/span').replace('\n', '').strip() not in 'inaktiv']
    final_results = []
    for res in active:
        item_dict = {}
        item_dict['rooms'] = grab_xpath_text(
            res, 'td[contains(@class, "zimmer")]/a/span').replace('\n', '').strip()
        item_dict['Free from'] = grab_xpath_text(
            res, 'td[contains(@class, "freiab")]/a/span').replace('\n', '').strip()
        item_dict['Rent price'] = grab_xpath_text(
            res, 'td[contains(@class, "miete")]/a/span/b').replace('\n', '').strip()
        item_dict['Size'] = grab_xpath_text(
            res, 'td[contains(@class, "groesse")]/a/span').replace('\n', '').strip()
        item_dict['District'] = grab_xpath_text(
            res, 'td[contains(@class, "stadt")]/a/span').replace('\n', '').strip()
        item_dict['Link'] = 'http://www.wg-gesucht.de/'+res.get('adid')
        final_results.append(item_dict)
    return final_results
    
def prepare_wg_data(results):
    """ Prepare wg-gesucht results in a dataframe so they can be easily compared. """
    wg_df = pd.DataFrame(results)[2:]
    wg_df['search_engine'] = 'wg-gesucht.de'
    return wg_df

def main(base_url, category, rent_type, minSize, maxPrice, minRooms, maxRooms, exc, balcony):
    """ Call search for each of the flat search engines."""
    wg_results = search(base_url, category, rent_type, minSize, maxPrice, minRooms, maxRooms, exc, balcony)
    final_df = prepare_wg_data(wg_results)

    return final_df

In [126]:
if __name__ == '__main__':
    base_url = 'http://www.wg-gesucht.de/wohnungen-in-Berlin.8.2.0.0.html?'
    category = raw_input('what type of flat are you searching for? (enter 0 for WG-Zimmer, 1 for 1-Zimmer-Wohnung, 2 for Wohnung, 3 for Haus) ')
    rent_type = raw_input('where type of contract? (0 for Egal, 1 for limited, 2 for unlimited, 3 for daily rent) ')
    minSize = raw_input('min square meters? ')
    maxPrice = raw_input('max rent in Euro? ')
    minRooms = raw_input('min number of rooms? ')
    maxRooms = raw_input('max number of rooms? ')
    exc = raw_input('exchange flat? (0 for Egal, 1 for Yes, 2 for No)')
    balcony = raw_input('with balcony? (0 for no, 1 for yes)')
    final = main(base_url, category, rent_type, minSize, maxPrice, minRooms, maxRooms, exc, balcony)
    final

what type of flat are you searching for? (enter 0 for WG-Zimmer, 1 for 1-Zimmer-Wohnung, 2 for Wohnung, 3 for Haus) 2
where type of contract? (0 for Egal, 1 for limited, 2 for unlimited, 3 for daily rent) 2
min square meters? 50
max rent in Euro? 500
min number of rooms? 2
max number of rooms? 3
exchange flat? (0 for Egal, 1 for Yes, 2 for No)2
with balcony? (0 for no, 1 for yes)0


In [127]:
final

Unnamed: 0,District,Free from,Link,Rent price,Size,rooms,search_engine
2,Rudow,01.05.17,http://www.wg-gesucht.de/wohnungen-in-Berlin-R...,11€,60m²,2,wg-gesucht.de
3,Prenzlauer Berg,01.05.17,http://www.wg-gesucht.de/wohnungen-in-Berlin-P...,400€,52m²,2,wg-gesucht.de
4,Neukölln,16.02.17,http://www.wg-gesucht.de/wohnungen-in-Berlin-N...,185€,68m²,3,wg-gesucht.de
5,Lichtenberg,01.03.17,http://www.wg-gesucht.de/wohnungen-in-Berlin-L...,442€,52m²,2,wg-gesucht.de
6,Baumschulenweg,01.04.17,http://www.wg-gesucht.de/wohnungen-in-Berlin-B...,473€,56m²,2,wg-gesucht.de
