Skip to content

Commit

Permalink
Fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
charmoniumQ committed Mar 17, 2022
1 parent 491299a commit 316552f
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 7 deletions.
9 changes: 6 additions & 3 deletions ascl_net_scraper/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def scrape_index_lazy(

github_regex = re.compile(r"https?://github.com/[a-zA-Z0-9\.\-/]")


@dataclass
class DetailedCodeRecord:
"""Detailed information about a code, for example <https://ascl.net/0000.000>."""
Expand All @@ -106,6 +107,7 @@ class DetailedCodeRecord:
def github(self) -> Optional[str]:
return cast(Optional[str], get_github_for(self))


@memoize(group=group)
def get_github_for(record: DetailedCodeRecord) -> Optional[str]:
# First, see if any code_site is a github site.
Expand All @@ -121,13 +123,14 @@ def get_github_for(record: DetailedCodeRecord) -> Optional[str]:
except requests.exceptions.RequestException:
# A lot of old sites are dead.
continue
for tag in bs4.BeautifulSoup(text).find_all("a"):
if re.match(tag.attrs["href"], text):
return tag.attrs["href"]
for tag in bs4.BeautifulSoup(text, DEFAULT_PARSER).find_all("a"):
if "href" in tag.attrs and re.match(tag.attrs["href"], text):
return cast(str, tag.attrs["href"])

# Third, give up.
return None


def dl_to_dict(dl: bs4.Tag) -> Mapping[str, bs4.Tag]:
return {
key.text: cast(bs4.Tag, val)
Expand Down
13 changes: 9 additions & 4 deletions tests/test_main.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
from tqdm import tqdm
import os.path
import shutil
shutil.rmtree(".cache")

from tqdm import tqdm

if os.path.exists(".cache"):
shutil.rmtree(".cache")

from ascl_net_scraper import __version__, scrape_details, scrape_index


def test_main() -> None:
records = scrape_index(10)
records = scrape_index(20)
for record in tqdm(records, total=len(records)):
detailed_record = scrape_details(record.details_url)
assert record.ascl_id == detailed_record.ascl_id
assert record.title == detailed_record.title
assert record.credit == detailed_record.credit
assert record.abstract == detailed_record.abstract
assert record.details_url == detailed_record.url
detailed_record.github
assert isinstance(detailed_record.github, (str, type(None)))

0 comments on commit 316552f

Please sign in to comment.