Skip to content

Commit

Permalink
Prep v4.2.2 release (#86)
Browse files Browse the repository at this point in the history
* Prep v4.2.2

* Prep v4.2.2
  • Loading branch information
jadchaar committed Jul 15, 2021
1 parent dc26c38 commit edea397
Show file tree
Hide file tree
Showing 9 changed files with 123 additions and 25 deletions.
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Expand Up @@ -21,11 +21,11 @@ repos:
- id: trailing-whitespace
exclude: ^tests/sample-filings/
- repo: https://github.com/timothycrosley/isort
rev: 5.9.1
rev: 5.9.2
hooks:
- id: isort
- repo: https://github.com/asottile/pyupgrade
rev: v2.19.4
rev: v2.21.0
hooks:
- id: pyupgrade
args: [--py36-plus]
Expand Down
20 changes: 20 additions & 0 deletions CHANGELOG.md
@@ -1,5 +1,25 @@
# Changelog

## 4.2.2 - 7/14/2021

### New

- CIKs are now automatically zero-padded to 10 digits to ensure that filings are accurately retrieved by the SEC Edgar system. For example, passing either `"0000789019"` or `"789019"` (the CIK for MSFT) to `get()` will yield equivalent results:

```python
>>> dl.get("10-K", "0000789019", amount=1)
1
>>> dl.get("10-K", "789019", amount=1)
1
```

### Fixed

- Updated the `User-Agent` header to comply with new [SEC Edgar Fair Access requirements](https://www.sec.gov/os/accessing-edgar-data). This should resolve the 403 network errors some users are encountering when downloading a significant number of filings.

### Changed
- A `ValueError` is now raised when a CIK of length >10 or a blank ticker/CIK is passed to `get()`.

## 4.2.1 - 6/22/2021

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion Makefile
@@ -1,6 +1,6 @@
.PHONY: auto test docs clean

auto: build38
auto: build39

build36: PYTHON_VER = python3.6
build37: PYTHON_VER = python3.7
Expand Down
14 changes: 14 additions & 0 deletions sec_edgar_downloader/Downloader.py
Expand Up @@ -10,6 +10,7 @@
download_filings,
get_filing_urls_to_download,
get_number_of_unique_filings,
is_cik,
validate_date_format,
)

Expand Down Expand Up @@ -111,6 +112,19 @@ def get(
"""
ticker_or_cik = str(ticker_or_cik).strip().upper()

# Check for blank tickers or CIKs
if not ticker_or_cik:
raise ValueError("Invalid ticker or CIK. Please enter a non-blank value.")

# Detect CIKs and ensure that they are properly zero-padded
if is_cik(ticker_or_cik):
if len(ticker_or_cik) > 10:
raise ValueError("Invalid CIK. CIKs must be at most 10 digits long.")
# Pad CIK with 0s to ensure that it is exactly 10 digits long
# The SEC Edgar Search API requires zero-padded CIKs to ensure
# that search results are accurate. Relates to issue #84.
ticker_or_cik = ticker_or_cik.zfill(10)

if amount is None:
# If amount is not specified, obtain all available filings.
# We simply need a large number to denote this and the loop
Expand Down
19 changes: 14 additions & 5 deletions sec_edgar_downloader/_utils.py
@@ -1,5 +1,4 @@
"""Utility functions for the downloader class."""

import time
from collections import namedtuple
from datetime import datetime
Expand Down Expand Up @@ -156,7 +155,7 @@ def get_filing_urls_to_download(
resp = client.post(
SEC_EDGAR_SEARCH_API_ENDPOINT,
json=payload,
headers=generate_random_user_agent(),
headers={"User-Agent": generate_random_user_agent()},
)
resp.raise_for_status()
search_query_results = resp.json()
Expand Down Expand Up @@ -245,7 +244,9 @@ def download_and_save_filing(
*,
resolve_urls: bool = False,
) -> None:
resp = client.get(download_url, headers=generate_random_user_agent())
resp = client.get(
download_url, headers={"User-Agent": generate_random_user_agent()}
)
resp.raise_for_status()
filing_text = resp.content

Expand Down Expand Up @@ -322,5 +323,13 @@ def get_number_of_unique_filings(filings: List[FilingMetadata]) -> int:
return len({metadata.accession_number for metadata in filings})


def generate_random_user_agent() -> dict:
return {"User-Agent": f"{fake.first_name()} {fake.last_name()} {fake.email()}"}
def generate_random_user_agent() -> str:
return f"{fake.first_name()} {fake.last_name()} {fake.email()}"


def is_cik(ticker_or_cik: str) -> bool:
try:
int(ticker_or_cik)
return True
except ValueError:
return False
2 changes: 1 addition & 1 deletion sec_edgar_downloader/_version.py
@@ -1 +1 @@
__version__ = "4.2.1"
__version__ = "4.2.2"
2 changes: 1 addition & 1 deletion tests/test_detail_downloads.py
@@ -1,4 +1,4 @@
# Regression test for issue 60
# Regression test for issue #60
def test_recursion_error_older_filings(downloader):
dl, _ = downloader

Expand Down
72 changes: 58 additions & 14 deletions tests/test_user_input.py
Expand Up @@ -20,9 +20,9 @@ def test_invalid_filing_type(downloader):
ticker = "AAPL"
expected_msg = f"'{fake_filing_type}' filings are not supported."

with pytest.raises(ValueError) as excinfo:
with pytest.raises(ValueError) as exc_info:
dl.get(fake_filing_type, ticker)
assert expected_msg in str(excinfo.value)
assert expected_msg in str(exc_info.value)


def test_invalid_ticker(downloader):
Expand All @@ -47,20 +47,64 @@ def test_invalid_ticker(downloader):
assert not filing_type_save_path.exists()


def test_invalid_cik(downloader):
dl, _ = downloader
expected_msg = "Invalid CIK. CIKs must be at most 10 digits long."

filing_type = "10-K"
cik = "12345678910"

with pytest.raises(ValueError) as exc_info:
dl.get(filing_type, cik, amount=1)
assert expected_msg in str(exc_info.value)


def test_blank_ticker(downloader):
dl, _ = downloader
expected_msg = "Invalid ticker or CIK. Please enter a non-blank value."

filing_type = "10-K"
ticker = ""

with pytest.raises(ValueError) as exc_info:
dl.get(filing_type, ticker, amount=1)
assert expected_msg in str(exc_info.value)


def test_cik_zero_padding(downloader):
# Regression test for issue #84
dl, dl_path = downloader

filing_type = "10-K"
cik = "0000789019"
num_filings_downloaded_full_cik = dl.get(filing_type, cik, amount=1)

trimmed_cik = "789019"
num_filings_downloaded_trimmed_cik = dl.get(filing_type, trimmed_cik, amount=1)

assert num_filings_downloaded_full_cik == 1
assert num_filings_downloaded_trimmed_cik == 1

# Both filings should be saved under the directory 0000789019
# if the CIK is properly padded. If zero-padding was not being done
# properly, we would have two parent directories of 789019 and 0000789019.
assert len(list(dl_path.glob("*"))) == 1


def test_invalid_num_filings_to_download(downloader):
dl, _ = downloader
expected_msg = "Invalid amount. Please enter a number greater than 1."

filing_type = "10-K"
ticker = "AAPL"

with pytest.raises(ValueError) as excinfo:
with pytest.raises(ValueError) as exc_info:
dl.get(filing_type, ticker, amount=-1)
assert expected_msg in str(excinfo.value)
assert expected_msg in str(exc_info.value)

with pytest.raises(ValueError) as excinfo:
with pytest.raises(ValueError) as exc_info:
dl.get(filing_type, ticker, amount=0)
assert expected_msg in str(excinfo.value)
assert expected_msg in str(exc_info.value)


def test_invalid_before_and_after_dates(downloader):
Expand All @@ -77,13 +121,13 @@ def test_invalid_before_and_after_dates(downloader):
before_date = datetime(2019, 11, 15)
incorrect_date_format = "%Y%m%d"

with pytest.raises(ValueError) as excinfo:
with pytest.raises(ValueError) as exc_info:
dl.get(filing_type, ticker, after=after_date.strftime(incorrect_date_format))
assert expected_msg in str(excinfo.value)
assert expected_msg in str(exc_info.value)

with pytest.raises(ValueError) as excinfo:
with pytest.raises(ValueError) as exc_info:
dl.get(filing_type, ticker, before=before_date.strftime(incorrect_date_format))
assert expected_msg in str(excinfo.value)
assert expected_msg in str(exc_info.value)


def test_valid_before_and_after_date_combos(downloader):
Expand Down Expand Up @@ -130,14 +174,14 @@ def test_invalid_before_and_after_date_combos(downloader):
# after_date > before_date
after_date += timedelta(days=3)
expected_msg = "Invalid after and before date combination."
with pytest.raises(ValueError) as excinfo:
with pytest.raises(ValueError) as exc_info:
dl.get(
filing_type,
ticker,
after=after_date.strftime(DATE_FORMAT_TOKENS),
before=before_date.strftime(DATE_FORMAT_TOKENS),
)
assert expected_msg in str(excinfo.value)
assert expected_msg in str(exc_info.value)


def test_pre_default_after_date(downloader):
Expand All @@ -148,9 +192,9 @@ def test_pre_default_after_date(downloader):

invalid_date = DEFAULT_AFTER_DATE - timedelta(days=1)
expected_msg = f"Filings cannot be downloaded prior to {DEFAULT_AFTER_DATE.year}."
with pytest.raises(ValueError) as excinfo:
with pytest.raises(ValueError) as exc_info:
dl.get(filing_type, ticker, after=invalid_date.strftime(DATE_FORMAT_TOKENS))
assert expected_msg in str(excinfo.value)
assert expected_msg in str(exc_info.value)


def test_non_string_date(downloader):
Expand Down
13 changes: 12 additions & 1 deletion tests/test_utils.py
@@ -1,6 +1,6 @@
"""Test miscellaneous utility functions."""

from sec_edgar_downloader._utils import resolve_relative_urls_in_filing
from sec_edgar_downloader._utils import is_cik, resolve_relative_urls_in_filing


def test_resolve_relative_urls():
Expand Down Expand Up @@ -43,3 +43,14 @@ def test_resolve_relative_urls():
assert resolved_filing_html.count(f'"{sample_anchor_fragment}"') == 2
assert resolved_filing_html.count(f'"{base_url}{sample_anchor_html}"') == 1
assert sample_filing_html.count(f'"{sample_anchor_full_url}"') == 1


def test_is_cik():
# CIKs are 10 digit identifiers that are zero-padded
# if they are shorter than 10 digits long
assert is_cik("0000789019")
assert is_cik("789019")
assert is_cik(789019)

assert not is_cik("AAPL")
assert not is_cik("")

0 comments on commit edea397

Please sign in to comment.