Skip to content

Commit

Permalink
Merge pull request #14 from PJUllrich/main
Browse files Browse the repository at this point in the history
Füge Suche nach Bekanntmachungen hinzu.
  • Loading branch information
LilithWittmann committed Aug 13, 2021
2 parents b79be06 + 8f9070f commit fed57b1
Show file tree
Hide file tree
Showing 7 changed files with 1,195 additions and 303 deletions.
327 changes: 147 additions & 180 deletions deutschland/handelsregister/handelsregister.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,11 @@
import requests

from bs4 import BeautifulSoup
from typing import Dict
from datetime import date

from deutschland.handelsregister.registrations import Registrations
from deutschland.handelsregister.publications import Publications

class Handelsregister:
SEARCH_URL = "https://www.handelsregister.de/rp_web/mask.do?Typ=e"

REQUEST_HEADERS = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"Cache-Control": "max-age=0",
"Connection": "keep-alive",
"Content-Type": "application/x-www-form-urlencoded",
"DNT": "1",
"Host": "www.handelsregister.de",
"Origin": "https://www.handelsregister.de",
"Referer": "https://www.handelsregister.de/rp_web/mask.do?Typ=e",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua": '"Chromium";v="92", " Not A;Brand";v="99", "Google Chrome";v="92"',
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-User": "?1",
"sec-gpc": "1",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36",
}

DEFAULT_FORM_DATA = {
"schlagwoerter": None,
"schlagwortOptionen": 2,
"suchOptionenAehnlich": False,
"niederlassung": None,
"suchOptionenGeloescht": False,
"suchOptionenNurZNneuenRechts": False,
"suchTyp": "e",
"registerArt": None,
"registerNummer": None,
"registergericht": None,
"rechtsform": None,
"postleitzahl": None,
"ort": None,
"strasse": None,
"ergebnisseProSeite": "100",
"btnSuche": "Suchen",
}

class Handelsregister:
def search(
self,
keywords: str = None,
Expand Down Expand Up @@ -135,155 +95,162 @@ def search(
"strasse": street,
"ergebnisseProSeite": limit,
}
return self.search_with_raw_params(params)
r = Registrations()
return r.search_with_raw_params(params)

def search_publications(
self,
county_code: str = None,
court_code: str = None,
court_name: str = None,
from_date: date = None,
until_date: date = None,
company_name: str = None,
head_office_location: str = None,
registration_type: str = None,
registration_number: str = None,
publication_type: int = 0,
order_by: int = 4,
detailed_search: bool = False,
):
"""
Search all publications by the Handelsregister.
def search_with_raw_params(self, params: Dict[str, str] = {}):
"""Searches the Handelsregister with a given dict of parameters.
Broad searches (e.g. find publications for all counties and all courts)
are only available for the past 4 weeks. If you want to search for older
publications, you must set 'detailed_search' to 'True' and provide the
'county_code', 'court_code', 'court_name' parameters, and at least one
of the following: 'company_name', 'head_office_location',
or 'registration_type' and 'registration_number'.
Parameters
----------
params : dict
The parameters for the search. Detailed description below.
Search Parameters
-----------------
schlagwoerter : string
One or space-separated Keywords like e.g. the company name.
county_code: str
The county code in which to search.
Valid options are:
by: Bayern
be: Berlin
br: Brandenburg
hb: Bremen
hh: Hamburg
he: Hessen
mv: Mecklenburg-Vorpommern
ni: Niedersachsen
nw: Nordrhein-Westfalen
rp: Rheinland-Pfalz
sl: Saarland
sn: Sachsen
st: Sachsen-Anhalt
sh: Schleswig-Holstein
th: Thüringen
court_code: str
The code for the court as specified in 'params.md'.
Both, the court code and the court name must be provided.
court_name: str
The name of the court as specified in 'params.md'.
Both, the court code and the court name must be provided.
from_date: date
Search for entries published after this date.
Publications older than 4 weeks can only be searched as
a 'detailed_search' as described above.
until_date: date
Search for entries published before this date.
company_name: str
The name of the company. Must be an exact match.
schlagwortOptionen : int
Options for the 'schlagwoerter' parameter.
1 : Match must contain all keywords.
2 : Match must contain at least one keyword.
3 : Match's company name must equal the keyword(s).
suchOptionenAehnlich : bool
Match can contain similar keywords as specified in 'schlagwoerter'.
bundeslandXX : string
Search only in specified counties (bundeslaender).
If no county is specified, all counties are searched.
Each county must be specified individually in the following format:
{"bundeslandXX": "on"}, where 'XX' is of the following codes:
BW, BY, BE, BR, HB, HH, HE, MV, NI, NW, RP, SL, SN, ST, SH, TH
niederlassung : string
Location of the company.
suchOptionenGeloescht : bool
Search also for deleted companies.
suchOptionenNurZNneuenRechts : bool
Include only 'Zweigniederlassungen' registered after the 01.01.2007.
More info here: https://www.handelsregister.de/rp_web/help.do?Thema=zweigniederlassungen
registerArt : string
Type of company registration.
Possible values: HRA, HRB, GnR, PR, VR
registerNummer : string
The registration number of the company.
registergericht : string
The district court where the company is registered.
rechtsform : int
The legal form of the company.
Possible values can be found in 'params.md'.
postleitzahl : string
The postal code of the company.
ort : string
The city of the company address.
head_office_location: str
The city where the head office of the company is located.
strasse : string
The street of the company address.
registration_type: str
The type of the company registration.
Valid types are:
"HRA", "HRB", "GnR", "VR", "PR", "AR"
ergebnisseProSeite : int
How many matches to return. Defaults to 100.
registration_number: str
The number of the company registration.
publication_type: int
The type of publication to search for.
Valid options are:
0 : All types of publications
1 : New registrations
2 : Registration changes
3 : Registrations deleted by the court
4 : Deletion announcements
5 : Deletions
6 : Granted Permissions
7 : Other procedures
order_by: int
How to order the publication results.
Valid options are:
1 : Registration Number
2 : Company name
3 : Order by creation date of publication
4 : Order by publication date
"""
search_params = {**self.DEFAULT_FORM_DATA, **params}

response = requests.post(
self.SEARCH_URL, data=search_params, headers=self.REQUEST_HEADERS
)
if response.status_code != 200:
return None

soup = BeautifulSoup(response.text, "html.parser")
return self.__find_entries(soup)

def __find_entries(self, soup):
table = soup.find("table", class_="RegPortErg")
if table is None:
return []

trs = table.find_all("tr")

results = []
next_entry = {}

# Skip the first row, which is the header row
for tr in trs[1:]:
if tr.find("td", class_="RegPortErg_AZ"):
data = self.__extract_county_court_and_registration(tr)

# Save the current entry since we reached the next entry
# demarcated by the .RegPortErg_AZ table cell.
if next_entry:
results.append(next_entry.copy())

next_entry = data
elif tr.find("td", class_="RegPortErg_FirmaKopf"):
data = self.__extract_name_location_and_state(tr)
next_entry.update(data)
elif tr.find("td", class_="RegPortErg_HistorieZn"):
data = self.__extract_history(tr)
next_entry.setdefault("history", []).append(data)

if next_entry:
results.append(next_entry)

return results

def __extract_county_court_and_registration(self, row):
td = row.find("td", class_="RegPortErg_AZ")
county = td.contents[2].strip()
court_and_registration = (
td.contents[3].text.strip().replace("\n", " ").replace("\t", "").split()
if (
detailed_search
and not (county_code and court_code and court_name)
and not (
company_name
or head_office_location
or (registration_type and registration_number)
)
):
raise Exception(
"""
In the detailed search, you must provide 'county_code',
'court_code', and 'court_name' as well
as either 'company_name', 'head_office_location' or
'registration_type' and 'registration_number'.
"""
)

reg_code = (
{
"HRA": "A",
"HRB": "B",
"GnR": "G",
"VR": "V",
"PR": "P",
"AR": "AR",
}[registration_type]
if registration_type
else None
)
court = " ".join(court_and_registration[:-2])
[reg_type, reg_num] = court_and_registration[-2:]

return {
"county": county,
"court": court,
"registration_type": reg_type,
"registration_number": reg_num,
}

def __extract_name_location_and_state(self, row):
tds = row.find_all("td")
name = tds[1].text.strip()
location = tds[2].text.strip()
state = tds[3].text.strip()

return {"company_name": name, "location": location, "state": state}

def __extract_history(self, row):
tds = row.find_all("td")
[position, historical_name] = tds[1].text.strip().split(".) ", 1)
historical_location = tds[2].text.strip().split(".) ", 1)[1]

return {
"position": position,
"historical_name": historical_name,
"historical_location": historical_location,
params = {
"suchart": "detail" if detailed_search else "uneingeschr",
"land": county_code,
"gericht": court_code,
"gericht_name": court_name,
"vt": from_date.day if from_date else None,
"vm": from_date.month if from_date else None,
"vj": from_date.year if from_date else None,
"bt": until_date.day if until_date else None,
"bm": until_date.day if until_date else None,
"bj": until_date.day if until_date else None,
"fname": company_name,
"fsitz": head_office_location,
"rubrik": reg_code,
"az": registration_number,
"gegenstand": publication_type,
"order": order_by,
}

p = Publications()
return p.search_with_raw_params(params)


if __name__ == "__main__":
hr = Handelsregister()
res = hr.search(keywords="Deutsche Bahn Aktiengesellschaft", keyword_match_option=3)
# res = hr.search(keywords="Deutsche Bahn Aktiengesellschaft", keyword_match_option=3)
res = hr.search_publications()
print(res)
Loading

0 comments on commit fed57b1

Please sign in to comment.