Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Füge Suche nach Bekanntmachungen hinzu. #14

Merged
merged 6 commits into from
Aug 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
327 changes: 147 additions & 180 deletions deutschland/handelsregister/handelsregister.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,11 @@
import requests

from bs4 import BeautifulSoup
from typing import Dict
from datetime import date

from deutschland.handelsregister.registrations import Registrations
from deutschland.handelsregister.publications import Publications

class Handelsregister:
SEARCH_URL = "https://www.handelsregister.de/rp_web/mask.do?Typ=e"

REQUEST_HEADERS = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"Cache-Control": "max-age=0",
"Connection": "keep-alive",
"Content-Type": "application/x-www-form-urlencoded",
"DNT": "1",
"Host": "www.handelsregister.de",
"Origin": "https://www.handelsregister.de",
"Referer": "https://www.handelsregister.de/rp_web/mask.do?Typ=e",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua": '"Chromium";v="92", " Not A;Brand";v="99", "Google Chrome";v="92"',
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-User": "?1",
"sec-gpc": "1",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36",
}

DEFAULT_FORM_DATA = {
"schlagwoerter": None,
"schlagwortOptionen": 2,
"suchOptionenAehnlich": False,
"niederlassung": None,
"suchOptionenGeloescht": False,
"suchOptionenNurZNneuenRechts": False,
"suchTyp": "e",
"registerArt": None,
"registerNummer": None,
"registergericht": None,
"rechtsform": None,
"postleitzahl": None,
"ort": None,
"strasse": None,
"ergebnisseProSeite": "100",
"btnSuche": "Suchen",
}

class Handelsregister:
def search(
self,
keywords: str = None,
Expand Down Expand Up @@ -135,155 +95,162 @@ def search(
"strasse": street,
"ergebnisseProSeite": limit,
}
return self.search_with_raw_params(params)
r = Registrations()
return r.search_with_raw_params(params)

def search_publications(
self,
county_code: str = None,
court_code: str = None,
court_name: str = None,
from_date: date = None,
until_date: date = None,
company_name: str = None,
head_office_location: str = None,
registration_type: str = None,
registration_number: str = None,
publication_type: int = 0,
order_by: int = 4,
detailed_search: bool = False,
):
"""
Search all publications by the Handelsregister.

def search_with_raw_params(self, params: Dict[str, str] = {}):
"""Searches the Handelsregister with a given dict of parameters.
Broad searches (e.g. find publications for all counties and all courts)
are only available for the past 4 weeks. If you want to search for older
publications, you must set 'detailed_search' to 'True' and provide the
'county_code', 'court_code', 'court_name' parameters, and at least one
of the following: 'company_name', 'head_office_location',
or 'registration_type' and 'registration_number'.

Parameters
----------
params : dict
The parameters for the search. Detailed description below.

Search Parameters
-----------------
schlagwoerter : string
One or space-separated Keywords like e.g. the company name.
county_code: str
The county code in which to search.

Valid options are:
by: Bayern
be: Berlin
br: Brandenburg
hb: Bremen
hh: Hamburg
he: Hessen
mv: Mecklenburg-Vorpommern
ni: Niedersachsen
nw: Nordrhein-Westfalen
rp: Rheinland-Pfalz
sl: Saarland
sn: Sachsen
st: Sachsen-Anhalt
sh: Schleswig-Holstein
th: Thüringen

court_code: str
The code for the court as specified in 'params.md'.
Both, the court code and the court name must be provided.

court_name: str
The name of the court as specified in 'params.md'.
Both, the court code and the court name must be provided.

from_date: date
Search for entries published after this date.
Publications older than 4 weeks can only be searched as
a 'detailed_search' as described above.

until_date: date
Search for entries published before this date.

company_name: str
The name of the company. Must be an exact match.

schlagwortOptionen : int
Options for the 'schlagwoerter' parameter.
1 : Match must contain all keywords.
2 : Match must contain at least one keyword.
3 : Match's company name must equal the keyword(s).

suchOptionenAehnlich : bool
Match can contain similar keywords as specified in 'schlagwoerter'.

bundeslandXX : string
Search only in specified counties (bundeslaender).
If no county is specified, all counties are searched.

Each county must be specified individually in the following format:
{"bundeslandXX": "on"}, where 'XX' is of the following codes:
BW, BY, BE, BR, HB, HH, HE, MV, NI, NW, RP, SL, SN, ST, SH, TH

niederlassung : string
Location of the company.

suchOptionenGeloescht : bool
Search also for deleted companies.

suchOptionenNurZNneuenRechts : bool
Include only 'Zweigniederlassungen' registered after the 01.01.2007.
More info here: https://www.handelsregister.de/rp_web/help.do?Thema=zweigniederlassungen

registerArt : string
Type of company registration.
Possible values: HRA, HRB, GnR, PR, VR

registerNummer : string
The registration number of the company.

registergericht : string
The district court where the company is registered.

rechtsform : int
The legal form of the company.
Possible values can be found in 'params.md'.

postleitzahl : string
The postal code of the company.

ort : string
The city of the company address.
head_office_location: str
The city where the head office of the company is located.

strasse : string
The street of the company address.
registration_type: str
The type of the company registration.
Valid types are:
"HRA", "HRB", "GnR", "VR", "PR", "AR"

ergebnisseProSeite : int
How many matches to return. Defaults to 100.
registration_number: str
The number of the company registration.

publication_type: int
The type of publication to search for.
Valid options are:
0 : All types of publications
1 : New registrations
2 : Registration changes
3 : Registrations deleted by the court
4 : Deletion announcements
5 : Deletions
6 : Granted Permissions
7 : Other procedures

order_by: int
How to order the publication results.
Valid options are:
1 : Registration Number
2 : Company name
3 : Order by creation date of publication
4 : Order by publication date
"""
search_params = {**self.DEFAULT_FORM_DATA, **params}

response = requests.post(
self.SEARCH_URL, data=search_params, headers=self.REQUEST_HEADERS
)
if response.status_code != 200:
return None

soup = BeautifulSoup(response.text, "html.parser")
return self.__find_entries(soup)

def __find_entries(self, soup):
table = soup.find("table", class_="RegPortErg")
if table is None:
return []

trs = table.find_all("tr")

results = []
next_entry = {}

# Skip the first row, which is the header row
for tr in trs[1:]:
if tr.find("td", class_="RegPortErg_AZ"):
data = self.__extract_county_court_and_registration(tr)

# Save the current entry since we reached the next entry
# demarcated by the .RegPortErg_AZ table cell.
if next_entry:
results.append(next_entry.copy())

next_entry = data
elif tr.find("td", class_="RegPortErg_FirmaKopf"):
data = self.__extract_name_location_and_state(tr)
next_entry.update(data)
elif tr.find("td", class_="RegPortErg_HistorieZn"):
data = self.__extract_history(tr)
next_entry.setdefault("history", []).append(data)

if next_entry:
results.append(next_entry)

return results

def __extract_county_court_and_registration(self, row):
td = row.find("td", class_="RegPortErg_AZ")
county = td.contents[2].strip()
court_and_registration = (
td.contents[3].text.strip().replace("\n", " ").replace("\t", "").split()
if (
detailed_search
and not (county_code and court_code and court_name)
and not (
company_name
or head_office_location
or (registration_type and registration_number)
)
):
raise Exception(
"""
In the detailed search, you must provide 'county_code',
'court_code', and 'court_name' as well
as either 'company_name', 'head_office_location' or
'registration_type' and 'registration_number'.
"""
)

reg_code = (
{
"HRA": "A",
"HRB": "B",
"GnR": "G",
"VR": "V",
"PR": "P",
"AR": "AR",
}[registration_type]
if registration_type
else None
)
court = " ".join(court_and_registration[:-2])
[reg_type, reg_num] = court_and_registration[-2:]

return {
"county": county,
"court": court,
"registration_type": reg_type,
"registration_number": reg_num,
}

def __extract_name_location_and_state(self, row):
tds = row.find_all("td")
name = tds[1].text.strip()
location = tds[2].text.strip()
state = tds[3].text.strip()

return {"company_name": name, "location": location, "state": state}

def __extract_history(self, row):
tds = row.find_all("td")
[position, historical_name] = tds[1].text.strip().split(".) ", 1)
historical_location = tds[2].text.strip().split(".) ", 1)[1]

return {
"position": position,
"historical_name": historical_name,
"historical_location": historical_location,
params = {
"suchart": "detail" if detailed_search else "uneingeschr",
"land": county_code,
"gericht": court_code,
"gericht_name": court_name,
"vt": from_date.day if from_date else None,
"vm": from_date.month if from_date else None,
"vj": from_date.year if from_date else None,
"bt": until_date.day if until_date else None,
"bm": until_date.day if until_date else None,
"bj": until_date.day if until_date else None,
"fname": company_name,
"fsitz": head_office_location,
"rubrik": reg_code,
"az": registration_number,
"gegenstand": publication_type,
"order": order_by,
}

p = Publications()
return p.search_with_raw_params(params)


if __name__ == "__main__":
hr = Handelsregister()
res = hr.search(keywords="Deutsche Bahn Aktiengesellschaft", keyword_match_option=3)
# res = hr.search(keywords="Deutsche Bahn Aktiengesellschaft", keyword_match_option=3)
res = hr.search_publications()
print(res)
Loading