In [1]:
from selenium import webdriver
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.select import Select
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import ElementClickInterceptedException
from selenium.common.exceptions import NoSuchElementException
import time
import pickle
import os
import requests
import spacy



def load_pickled_dict(file_path):
    if os.path.exists(file_path):
        with open(file_path, 'rb') as file:
            pickled_data = file.read()
            return pickle.loads(pickled_data)
    else:
        return None

# Specify the file path where your pickled set is stored
file_path = "apartment_links.pk1"

# Load the pickled set if available 
dict_links = load_pickled_dict(file_path=file_path)

if dict_links is None:
    dict_links = dict()

website = "https://www.deutsche-wohnen.com/"

# Open Chrome
driver = webdriver.Chrome()

# Open the URL of deutsche Wohnen
driver.get(website)
wait = WebDriverWait(driver, 5) # Wait 5 seconds (maybe slow internet?)

# The search bar is already available when opening the website
# Select the required fields to start a search

ort_field = driver.find_element(By.ID,"search-object-location")
ort_field.send_keys("Berlin")
ort_suggestion_list = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "search-objects__location-auto-item-label")))
ort_field.send_keys(Keys.ARROW_DOWN)
ort_field.send_keys(Keys.RETURN)

search_button = driver.find_element(By.ID,"search-objects-result")
search_button.click()

# Now we accessed the page with the links for the apartments/objects
# As long as there is an arrow for the next page: collect links and store them in a dictionary
# Since we do not want to spam the Makler with emails everytime the script is run, the links have
# to be checked with the links from the pickled dictionary.

index_page = 1

while True:
    driver.execute_script("return document.readyState === 'complete'")
    wait
    try:
        # print(f"Current page: {index_page}")
        index_page += 1
        
        links = driver.find_elements(By.XPATH, '//a[contains(@class, "object-list__content-container")]')
        for link in links:
            if link.get_attribute("href") not in dict_links:
                dict_links.update({link.get_attribute("href"):False})
                print(link.get_attribute("href"))
        try:
            next_button = driver.find_element(By.ID, 'object-list-3')
        except NoSuchElementException:
            break
        
        next_button.click()
        
        wait
        
        
        
    except TimeoutException:
        # Break out of the loop if the next button is not found (reached the last page)
        break
    except ElementClickInterceptedException:
        break

with open(file_path, 'wb') as file:
    pickle.dump(dict_links, file)                                  

https://www.deutsche-wohnen.com/expose/object/89-1528160009
https://www.deutsche-wohnen.com/expose/object/89-1528090010
https://www.deutsche-wohnen.com/expose/object/89-1520030003
https://www.deutsche-wohnen.com/expose/object/89-1666800008
https://www.deutsche-wohnen.com/expose/object/89-1649070080
https://www.deutsche-wohnen.com/expose/object/89-1613350005
https://www.deutsche-wohnen.com/expose/object/89-1460880002
https://www.deutsche-wohnen.com/expose/object/89-1518950027
https://www.deutsche-wohnen.com/expose/object/89-1443120007
https://www.deutsche-wohnen.com/expose/object/89-1534100005
https://www.deutsche-wohnen.com/expose/object/89-1696650008
https://www.deutsche-wohnen.com/expose/object/89-1697390003
https://www.deutsche-wohnen.com/expose/object/89-1460480005
https://www.deutsche-wohnen.com/expose/object/89-1642150008
https://www.deutsche-wohnen.com/expose/object/89-1526540049
https://www.deutsche-wohnen.com/expose/object/89-1469160008
https://www.deutsche-wohnen.com/expose/o

In [2]:
# Extract gender from name. Names used for both males and females are considered as males per default.
def get_gender_from_api(name):
    url = f"https://api.genderize.io/?name={name}&language=de"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if data.get('gender'):
            return data['gender']
    return None

In [3]:
# Extract names from text and returns it as a tuple in form of (family name, given name).
# The family name is assumed to be the last word of the name.
# The given name is assumed to be the rest.
def extract_names(text:str)->tuple:
    # Load the spaCy model for German language
    nlp = spacy.load("de_core_news_sm")

    # Process the text with spaCy
    doc = nlp(text)
    

    # Initialize variables to store family name and given name
    family_name = ""
    given_name = ""

    # Loop through the named entities in the text
    for ent in doc.ents:
        print(len(ent))
        if ent.label_ == "PER":  # Check if it's a person's name
            # Assume the last token in the entity is the family name
            family_name = ent[-1].text
            # Assume the given name includes all tokens except the last one (family name)
            given_name = " ".join(token.text for token in ent[:-1])
            
            break

    return family_name, given_name

# Generates salutation given parameters
def generate_salutation(gender:str, family_name:str)->str:
    if gender == "male":
        return f"Sehr geehrter Herr {family_name}"
    elif gender == "female":
        return f"Sehr geehrte Frau {family_name}"
    else:
        return "Sehr geehrte Damen und Herren"

' Generates custom message to be send to the agent over the platform'
def generate_custom_message(user_name:str, agent:str, address:str)->str:
    family_name, given_name = extract_names(agent)
    gender = "_"
    for _ in given_name.split():
        # print(f"In def generate_custom_message, name used to find gender {_}")
        gender = get_gender_from_api(name=_)
        break
    salutation = generate_salutation(gender=gender, family_name=family_name)
    # Read the message template stored in the same folder
    with open("template_message.txt", "r", encoding="utf-8") as file:
        message_template = file.read()
    
    # Replace the placeholders in the message
    customized_message = message_template.replace("{user_name}", user_name)
    customized_message = customized_message.replace("[Salutation]", salutation)
    customized_message = customized_message.replace("[Stadtteil/Adresse]", address)
    
    return customized_message
    



In [4]:
# Fills the application found at the given URL (link).
# Since this is a demo: the application is not sent to the agent.
# Instead after filling out the fields with user data, the web page closes
def fill_application(
                     link:str,
                     user_first_name:str,
                     user_last_name:str,
                     user_email:str,
                     user_phone_number:str)->bool:
    
    user_name = f"{user_first_name} {user_last_name}"    

    # Open Chrome
    driver = webdriver.Chrome()
    driver.get(link)
    
    # Wait until webpage is loaded
    driver.execute_script("return document.readyState === 'complete'")
    
    wait = WebDriverWait(driver, 10)
    
    # extract address
    wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.object-detail__address")))
    address_element = driver.find_element(By.CSS_SELECTOR, "div.object-detail__address")
    address = address_element.text
    print(address)


    # Extract agent name
    wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "contactbox__name")))
    agent_elements = driver.find_elements(By.CLASS_NAME, "contactbox__name")
    agent = agent_elements[1].text
    print(agent)

    custom_message =generate_custom_message(user_name=user_name, agent=agent, address=address)

    # Select message field
    wait.until(EC.visibility_of_element_located((By.ID, "message")))
    message_field = driver.find_element(By.ID, "message")
    message_field.click()
    message_field.clear()
    message_field.send_keys(custom_message)

    # Select first name field
    wait.until(EC.visibility_of_element_located((By.ID, "first-name")))
    first_name_field = driver.find_element(By.ID, "first-name")
    first_name_field.clear()
    first_name_field.send_keys(user_first_name)

    # Select last name field
    wait.until(EC.visibility_of_element_located((By.ID, "last-name")))
    last_name_field = driver.find_element(By.ID, "last-name")
    last_name_field.clear()
    last_name_field.send_keys(user_last_name)

    # Select email field
    wait.until(EC.visibility_of_element_located((By.ID, "email")))
    email_field = driver.find_element(By.ID, "email")
    email_field.clear()
    email_field.send_keys(user_email)

    # Select phone field
    wait.until(EC.visibility_of_element_located((By.ID, "phone")))
    phone_field = driver.find_element(By.ID, "phone")
    phone_field.clear()
    phone_field.send_keys(user_phone_number)

    # Select working status field: Default is employer. In most cases no need to change it
    # wait.until(EC.visibility_of_element_located((By.ID, "currentEmployment")))
    #working_status_field = driver.find_element(By.ID, "currentEmployment")
    #working_status_field.send_keys(Keys.ARROW_DOWN) # repeat this step multiple times to get the desired status
    #working_status_field.send_keys(Keys.RETURN)
    #ort_field.send_keys(Keys.ARROW_DOWN)
    #ort_field.send_keys(Keys.RETURN)

    # Select income field
    wait.until(EC.visibility_of_element_located((By.ID, "incomeType")))
    income_field = driver.find_element(By.ID, "incomeType")
    income_field.send_keys(Keys.ARROW_DOWN) # To select Eigenes Einkommen
    income_field.send_keys(Keys.RETURN)

    # Select income value field
    wait.until(EC.visibility_of_element_located((By.ID, "incomeLevel")))
    income_value_field = driver.find_element(By.ID, "incomeLevel")
    income_value_field.send_keys(Keys.ARROW_DOWN) # Select bis 2000€
    income_value_field.send_keys(Keys.ARROW_DOWN) # Select von 2001€ bis 3000€
    income_value_field.send_keys(Keys.RETURN)
    
    print(f"DEMO: Application for object at {address} has been sent!\nThe following message has been used\n{custom_message}")
    
    return True
    
    

In [5]:
user_first_name = "Max"
user_last_name = "Mustermann"
user_email = "maxmustermann"
user_phone_number = "+49123456789"

# Iterate over the dictionary of links. If the the value of the key is False send message otherwise no
for link, value in dict_links.items():
    if not value:
        dict_links[link] = fill_application(link=link,
                                            user_first_name=user_first_name,
                                            user_last_name=user_last_name,
                                            user_email=user_email,
                                            user_phone_number=user_phone_number)
    else:
        print(f"Application for link-object {link} has been already sent!")
        
with open(file_path, 'wb') as file:
    pickle.dump(dict_links, file)   

Rüdickenstr. 20, 13053 Berlin, Neu-Hohenschönhausen
Malush Ada Witte
3
DEMO: Application for object at Rüdickenstr. 20, 13053 Berlin, Neu-Hohenschönhausen has been sent!
The following message has been used
Sehr geehrter Herr Witte,

ich hoffe, dass meine E-Mail Sie in bester Gesundheit erreicht. Mein Name ist Max Mustermann und ich schreibe Ihnen, um mein Interesse an Ihrem aktuellen Wohnungsinserat auszudrücken.

Ich habe Ihr Angebot für die Wohnung in Rüdickenstr. 20, 13053 Berlin, Neu-Hohenschönhausen sorgfältig geprüft und es entspricht genau meinen Anforderungen. Die Wohnung klingt äußerst vielversprechend, und ich bin überzeugt, dass sie meinen Bedürfnissen und Vorlieben entspricht.

Ich möchte Sie darüber informieren, dass alle meine erforderlichen Dokumente und Unterlagen bereits vollständig vorbereitet sind. Ich bin bereit, sie Ihnen zur Verfügung zu stellen, um den Mietprozess zu erleichtern.

Falls möglich, würde ich gerne einen Besichtigungstermin vereinbaren, um die Wohnun

In [None]:
get_gender_from_api("Katharina")

'female'