Skip to content

Commit

Permalink
🤝 Merge pull request #793 from dmoney/re-enable-bitbucket-test
Browse files Browse the repository at this point in the history
Re enable bitbucket test and fix bitbucket repo handler (Fixes #462)
  • Loading branch information
jefftriplett committed Feb 26, 2022
2 parents 8872319 + 48069a2 commit c83c55f
Show file tree
Hide file tree
Showing 4 changed files with 188 additions and 71 deletions.
7 changes: 7 additions & 0 deletions docs/install.rst
Expand Up @@ -33,6 +33,13 @@ There's an example file available. To get started, copy the file:
cp .env.local.example .env.local
Add A GitHub API Token
----------------------

Get a `GitHub API token <https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token>`_ and set the ``GITHUB_TOKEN`` variable in ``.env.local``
to this value. This is used by the GitHub repo handler for fetching repo
metadata, and required for certain tests.

Build the Docker Containers
---------------------------

Expand Down
17 changes: 9 additions & 8 deletions package/repos/bitbucket.py
Expand Up @@ -8,7 +8,7 @@

import requests

API_TARGET = "https://api.bitbucket.org/1.0/repositories"
API_TARGET = "https://api.bitbucket.org/2.0/repositories"

descendants_re = re.compile(r"Forks/Queues \((?P<descendants>\d+)\)", re.IGNORECASE)

Expand All @@ -24,27 +24,28 @@ def _get_bitbucket_commits(self, package):
repo_name = package.repo_name()
if repo_name.endswith("/"):
repo_name = repo_name[0:-1]
target = f"{API_TARGET}/{repo_name}/changesets/?limit=50"
# not sure if the limit parameter does anything in api 2.0
target = f"{API_TARGET}/{repo_name}/commits/?limit=50"
try:
data = self.get_json(target)
except requests.exceptions.HTTPError:
return []
if data is None:
return [] # todo: log this?

return data.get("changesets", [])
return data.get("values", [])

def fetch_commits(self, package):
from package.models import (
Commit,
) # Import placed here to avoid circular dependencies

for commit in self._get_bitbucket_commits(package):
timestamp = commit["timestamp"].split("+")
timestamp = commit["date"].split("+")
if len(timestamp) > 1:
timestamp = timestamp[0]
else:
timestamp = commit["timestamp"]
timestamp = commit["date"]
commit, created = Commit.objects.get_or_create(
package=package, commit_date=timestamp
)
Expand Down Expand Up @@ -93,15 +94,15 @@ def fetch_metadata(self, package):
data = self.get_json(url)
except requests.exceptions.HTTPError:
return package
package.repo_forks = len(data["forks"])
package.repo_forks = len(data["values"])

# get the followers of a repo
url = f"{target}followers/"
url = f"{target}watchers/"
try:
data = self.get_json(url)
except requests.exceptions.HTTPError:
return package
package.repo_watchers = data["count"]
package.repo_watchers = len(data.get("values", []))

# Getting participants
try:
Expand Down
85 changes: 85 additions & 0 deletions package/tests/get_bitbucket_repos.py
@@ -0,0 +1,85 @@
"""
A script to find Bitbucket repos that (a) still exist and (b) have forks.
Usage: python get_bitbucket_repos.py
Outputs a list of repos and the number of forks each has.
When testing repo handlers, the tests call the Bitbucket repo handler to
fetch repo metadata. However, many Bitbucket repos are no longer active,
have disappeared, or have no forks. This script was created to find a good
repo to test against, and, may be needed in the future if that particular
repo goes away. It may take a few minutes to run, due to only being able
to hit the APIs so fast.
"""

import requests, json, re, time

DJPACK_API_URL = "https://djangopackages.org/api/v3/packages/"
DJPACK_API_URL_BASE = "https://djangopackages.org"


def bitbucket_urls():
next_url = DJPACK_API_URL
while next_url:
response = requests.get(next_url)
parsed = json.loads(response.content)
next_path = parsed["meta"]["next"]
next_url = f"{DJPACK_API_URL_BASE}{next_path}" if next_path else None
for repo in parsed["objects"]:
if "bitbucket.org" in repo["repo_url"]:
yield repo["repo_url"]
time.sleep(.1)


def non404urls(urls):
for url in urls:
url = url.strip()
response = requests.get(url)
# if response.status_code == 200:
# print(url)
if response.status_code != 404:
yield response.status_code, url
time.sleep(1)
if response.status_code == 429: # too many requests:
time.sleep(10)


def bitbucket_repos_with_forks(urls, include_unforked=False):
for url in urls:
urlparts = url.split("/")
if len(urlparts) < 5:
continue
_, _, _, user, repo, *_ = urlparts
api_url = f"https://api.bitbucket.org/2.0/repositories/{user}/{repo}/forks/"
response = requests.get(api_url)
if response.status_code != 200:
continue
parsed = json.loads(response.content)
num_forks = len(parsed["values"])
if num_forks or include_unforked:
yield num_forks, url

time.sleep(1)
if response.status_code == 429: # too many requests:
time.sleep(10)


def main():
print("Getting bitbucket repos from Django Packages API...")
urls = list(bitbucket_urls())
print(f"Found {len(urls)}.")

# We might not actually need do do this before calling the BB API
print("Checking for non-404'd repos...")
urls = [url for status, url in non404urls(urls) if status == 200]
print(f"Found {len(urls)}.")

print("Searching Bitbucket Cloud API for repos with forks...")
results = list(bitbucket_repos_with_forks(urls))
print(f"Found {len(results)}. Showing repos and number of forks:")
for num_forks, url in results:
print(num_forks, url)

if __name__ == '__main__':
main()
150 changes: 87 additions & 63 deletions package/tests/test_repos.py
@@ -1,21 +1,26 @@
import pytest

from django.test import TestCase

from package.repos import get_repo, get_repo_for_repo_url, supported_repos
from package.repos.base_handler import BaseHandler
from package.repos.unsupported import UnsupportedHandler
from package.repos.bitbucket import BitbucketHandler
from package.repos.github import GitHubHandler
from package.models import Package, Category, Commit


# class TestBaseHandler(TestCase):
# def setUp(self):
# super().setUp()
# self.category = Category.objects.create(title="dummy", slug="dummy")
# self.category.save()
# self.package = Package.objects.create(
# title="Django Piston",
# slug="django-piston",
# repo_url="https://bitbucket.org/jespern/django-piston",
# category=self.category,
# )
class TestBaseHandler(TestCase):
def setUp(self):
super().setUp()
self.category = Category.objects.create(title="dummy", slug="dummy")
self.category.save()
self.package = Package.objects.create(
title="Django Piston",
slug="django-piston",
repo_url="https://bitbucket.org/jespern/django-piston",
category=self.category,
)


def test_base_handler_not_implemented(package):
Expand Down Expand Up @@ -179,70 +184,89 @@ def test_get_repo_registry(package):
# TODO: Convert all of these to pytest tests and re-write them since
# they were already commented out.

"""
class TestBitbucketRepo(TestBaseHandler):
def setUp(self):
super(TestBitbucketRepo, self).setUp()
self.package = Package.objects.create(
title="django",
slug="django",
repo_url="https://bitbucket.org/django/django",
category=self.category
category=self.category,
title="django-mssql",
slug="django-mssql",
repo_url="https://bitbucket.org/Manfre/django-mssql/"
)
self.bitbucket_handler = BitbucketHandler()

def test_fetch_commits(self):
self.assertEqual(Commit.objects.count(), 0)
bitbucket_handler.fetch_commits(self.package)
self.bitbucket_handler.fetch_commits(self.package)
self.assertNotEqual(Commit.objects.count(), 0)

def test_fetch_metadata(self):
package = bitbucket_handler.fetch_metadata(self.package)
package = self.bitbucket_handler.fetch_metadata(self.package)
self.assertTrue(
package.repo_description.startswith("Official clone of the Subversion repo")
package.repo_description.startswith("Microsoft SQL server backend for Django running on windows")
)
self.assertTrue(package.repo_watchers > 0)
self.assertTrue(package.repo_forks > 0)
self.assertEquals(package.participants, "django")
"""


# class TestGithubRepo(TestBaseHandler):
# def setUp(self):
# super().setUp()
# self.package = Package.objects.create(
# title="Django",
# slug="django",
# repo_url="https://github.com/django/django",
# category=self.category,
# )

# # def test_fetch_commits(self):
# # import time
# # time.sleep(10)
# # self.assertEqual(Commit.objects.count(), 0)
# # github_handler.fetch_commits(self.package)
# # self.assertTrue(Commit.objects.count() > 0)

# # def test_fetch_metadata(self):
# # # Currently a live tests that access github
# # package = github_handler.fetch_metadata(self.package)
# # self.assertEqual(package.repo_description, "The Web framework for perfectionists with deadlines.")
# # self.assertTrue(package.repo_watchers > 100)

# # # test what happens when setting up an unsupported repo
# # self.package.repo_url = "https://example.com"
# # self.package.fetch_metadata()
# # self.assertEqual(self.package.repo_description, "")
# # self.assertEqual(self.package.repo_watchers, 0)
# # self.package.fetch_commits()


# class TestGitlabRepo(TestBaseHandler):
# def setUp(self):
# super().setUp()
# self.package = Package.objects.create(
# title="Django",
# slug="django",
# repo_url="https://gitlab.com/delta10/kees",
# category=self.category,
# )
self.assertEquals(package.participants, "Manfre")


class TestGithubRepo(TestBaseHandler):
def setUp(self):
super().setUp()
self.package = Package.objects.create(
title="Django",
slug="django",
repo_url="https://github.com/django/django",
category=self.category,
)
self.github_handler = GitHubHandler()

self.invalid_package = Package.objects.create(
title="Invalid Package",
slug="invldpkg",
repo_url="https://example.com",
category=self.category,
)

def test_fetch_commits(self):
self.assertEqual(Commit.objects.count(), 0)
self.github_handler.fetch_commits(self.package)
self.assertTrue(Commit.objects.count() > 0)

def test_fetch_metadata(self):
# Currently a live tests that access github
package = self.github_handler.fetch_metadata(self.package)
self.assertEqual(package.repo_description, "The Web framework for perfectionists with deadlines.")
self.assertTrue(package.repo_watchers > 100)

def test_fetch_metadata_unsupported_repo(self):
# test what happens when setting up an unsupported repo
self.package.repo_url = "https://example.com"
package = self.github_handler.fetch_metadata(self.invalid_package)

self.assertEqual(package.repo_description, "")
self.assertEqual(package.repo_watchers, 0)
self.invalid_package.fetch_commits()
self.assertEqual(package.commit_set.count(), 0)


class TestGitlabRepo(TestBaseHandler):
def setUp(self):
super().setUp()
self.package = Package.objects.create(
title="Django",
slug="django",
repo_url="https://gitlab.com/delta10/kees",
category=self.category,
)


class TestRepos(TestBaseHandler):
def test_repo_registry(self):
from package.repos import get_repo, supported_repos

g = get_repo("github")
self.assertEqual(g.title, "GitHub")
self.assertEqual(g.url, "https://github.com")
self.assertTrue("github" in supported_repos())
self.assertRaises(ImportError, lambda: get_repo("xyzzy"))

0 comments on commit c83c55f

Please sign in to comment.