/
utils.py
100 lines (88 loc) · 3.46 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import calendar
import logging
import time
import requests
from datetime import datetime
from github import Github
from github.GithubException import RateLimitExceededException, UnknownObjectException
from gitlab import Gitlab
from gitlab.exceptions import GitlabGetError
from bs4 import BeautifulSoup
logger = logging.getLogger(__name__)
def get_github_repository_data(g: Github, repository_name: str) -> dict[str, int]:
try:
repo = g.get_repo(repository_name)
except UnknownObjectException:
logger.warning(f"Repository {repository_name} was not found on Github.")
return {}
except RateLimitExceededException:
core_rate_limit = g.get_rate_limit().core
reset_timestamp = calendar.timegm(core_rate_limit.reset.timetuple())
sleep_time = reset_timestamp - calendar.timegm(time.gmtime()) + 10
logger.warning(f"Rate-limit detected, sleeping for {sleep_time} seconds.")
time.sleep(sleep_time)
repo = g.get_repo(repository_name)
return {
"repository_stars_count": repo.stargazers_count,
"repository_last_update": int(repo.updated_at.timestamp()),
}
def get_gitlab_repository_data(gl: Gitlab, repository_name: str) -> dict[str, int]:
try:
repo = gl.projects.get(repository_name)
last_commit_date = repo.commits.list(per_page=1, get_all=False)[
0
].committed_date
except GitlabGetError:
logger.warning(f"Repository {repository_name} was not found on Gitlab.")
return {}
return {
"repository_stars_count": repo.star_count,
"repository_last_update": int(
datetime.strptime(last_commit_date[0:10], "%Y-%m-%d").timestamp()
),
}
def get_gitlab_repository_data_with_webscraping(repository_url: str) -> dict[str, int]:
try:
soup = BeautifulSoup(requests.get(repository_url).content, "lxml")
repository_stars_count = int(
soup.select("a.gl-button.star-count")[0].text.strip()
)
return {
"repository_stars_count": repository_stars_count,
}
except Exception as e:
logger.warning(f"Couldn't scrape {repository_url}: {e}")
return {}
def get_codeberg_repository_data(repository_name: str) -> dict[str, int]:
result = requests.get(f"https://codeberg.org/api/v1/repos/{repository_name}")
if result.status_code == 200:
json_result = result.json()
return {
"repository_stars_count": json_result.get("stars_count"),
"repository_last_update": int(
datetime.strptime(
json_result.get("updated_at")[0:10], "%Y-%m-%d"
).timestamp()
),
}
return {}
def get_sourcehunt_repository_data_with_webscraping(
repository_url: str,
) -> dict[str, int]:
try:
soup = BeautifulSoup(requests.get(repository_url).content, "lxml")
commit_list = soup.select("small.pull-right a span")
if len(commit_list) >= 1:
repository_last_update = int(
datetime.strptime(
commit_list[0].get("title")[0:10], "%Y-%m-%d"
).timestamp()
)
return {
"repository_stars_count": 0, # sourcehunt doesn't support starring projects
"repository_last_update": repository_last_update,
}
return {}
except Exception as e:
logger.warning(f"Couldn't scrape {repository_url}: {e}")
return {}