Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ release: venv
.PHONY: test-repo
test-repo: venv
venv/bin/dumb-pypi \
--package-list testing/package-list \
--package-list-json testing/package-list-json \
--packages-url http://just.an.example/ \
--output-dir test-repo \
--logo https://i.fluffy.cc/tZRP1V8hdKCdrRQG5fBCv74M0VpcPLjP.svg \
Expand Down
33 changes: 28 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,15 @@ For more about why this design was chosen, see the detailed

## Usage

There are two main components:
To use dumb-pypi, you need two things:

* A script which generates the index.
* A script which generates the index. (That's this project!)

* A generic webserver to serve the generated index.

It's up to you how to deploy these. For example, you might sync the built index
into an S3 bucket, and serve it directly from S3. You might run nginx from the
built index locally.
This part is up to you. For example, you might sync the built index into an
S3 bucket, and serve it directly from S3. You might run nginx from the built
index locally.

My recommended high-availability (but still quite simple) deployment is:

Expand Down Expand Up @@ -101,6 +101,28 @@ The built index will be in `my-built-index`. It's now up to you to figure out
how to serve that with a webserver (nginx is a good option — details below!).


#### Additional options for packages

You can extend the capabilities of your registry using the extended JSON input
syntax when providing your package list to dumb-pypi. Instead of using the
format listed above of one filename per line, format your file with one JSON
object per line, like this:

```json
{"filename": "dumb-init-1.1.2.tar.gz", "hash": "md5=<hash>", "uploaded_by": "ckuehl", "upload_timestamp": 1512539924}
```

The `filename` key is required. All other keys are optional and will be used to
provide additional information in your generated repository. This extended
information can be useful to determine, for example, who uploaded a package.
(Most of this information is useful in the web UI by humans, not by pip.)

Where should you get information about the hash, uploader, etc? That's up to
you—dumb-pypi isn't in the business of storing or calculating this data. If
you're using S3, one easy option is to store it at upload time as [S3
metadata][s3-metadata].


### Recommended nginx config

You can serve the packages from any static webserver (including directly from
Expand Down Expand Up @@ -152,3 +174,4 @@ To run the tests, call `make test`. To run an individual test, you can do

[rationale]: https://github.com/chriskuehl/dumb-pypi/blob/master/RATIONALE.md
[pep503]: https://www.python.org/dev/peps/pep-0503/#normalized-names
[s3-metadata]: https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html#UserMetadata
103 changes: 83 additions & 20 deletions dumb_pypi/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import argparse
import collections
import contextlib
import json
import operator
import os
import os.path
Expand Down Expand Up @@ -65,7 +66,9 @@ class Package(collections.namedtuple('Package', (
'filename',
'name',
'version',
'url',
'hash',
'upload_timestamp',
'uploaded_by',
))):

__slots__ = ()
Expand All @@ -90,8 +93,37 @@ def sort_key(self):
self.filename[::-1],
)

@property
def formatted_upload_time(self):
return _format_datetime(datetime.fromtimestamp(self.upload_timestamp))

@property
def info_string(self):
# TODO: I'd like to remove this "info string" and instead format things
# nicely for humans (e.g. in a table or something).
#
# This might mean changing the web interface to use different pages for
# humans than the /simple/ ones it currently links to. (Even if pip can
# parse links from a <table>, it might add significantly more bytes.)
info = self.version or 'unknown version'
if self.upload_timestamp is not None:
info += f', {self.formatted_upload_time}'
if self.uploaded_by is not None:
info += f', {self.uploaded_by}'
return info

def url(self, base_url):
return f'{base_url.rstrip("/")}/{self.filename}'

@classmethod
def from_filename(cls, filename, base_url):
def create(
cls,
*,
filename,
hash=None,
upload_timestamp=None,
uploaded_by=None,
):
if not re.match('[a-zA-Z0-9_\-\.]+$', filename) or '..' in filename:
raise ValueError('Unsafe package name: {}'.format(filename))

Expand All @@ -100,7 +132,9 @@ def from_filename(cls, filename, base_url):
filename=filename,
name=packaging.utils.canonicalize_name(name),
version=version,
url=base_url.rstrip('/') + '/' + filename,
hash=hash,
upload_timestamp=upload_timestamp,
uploaded_by=uploaded_by,
)


Expand All @@ -120,22 +154,17 @@ def atomic_write(path):
os.rename(tmp, path)


def _format_datetime(dt):
return dt.strftime('%Y-%m-%d %H:%M:%S')


# TODO: at some point there will be so many options we'll want to make a config
# object or similar instead of adding more arguments here
def build_repo(package_names, output_path, packages_url, title, logo, logo_width):
packages = collections.defaultdict(set)
for filename in package_names:
try:
package = Package.from_filename(filename, packages_url)
except ValueError as ex:
print('{} (skipping package)'.format(ex), file=sys.stderr)
else:
packages[package.name].add(package)

def build_repo(packages, output_path, packages_url, title, logo, logo_width):
simple = os.path.join(output_path, 'simple')
os.makedirs(simple, exist_ok=True)

current_date = datetime.now().isoformat()
current_date = _format_datetime(datetime.now())

# /index.html
with atomic_write(os.path.join(output_path, 'index.html')) as f:
Expand Down Expand Up @@ -174,20 +203,54 @@ def build_repo(package_names, output_path, packages_url, title, logo, logo_width
# Newer versions should sort first.
reverse=True,
),
packages_url=packages_url,
))


def package_list(path):
def _lines_from_path(path):
f = sys.stdin if path == '-' else open(path)
return frozenset(f.read().splitlines())
return f.read().splitlines()


def _create_packages(package_infos):
packages = collections.defaultdict(set)
for package_info in package_infos:
try:
package = Package.create(**package_info)
except ValueError as ex:
# TODO: this should really be optional; i'd prefer it to fail hard
print('{} (skipping package)'.format(ex), file=sys.stderr)
else:
packages[package.name].add(Package.create(**package_info))

return packages


def package_list(path):
return _create_packages({'filename': line} for line in _lines_from_path(path))


def package_list_json(path):
return _create_packages(json.loads(line) for line in _lines_from_path(path))


def main(argv=None):
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'--package-list', help='path to a list of packages (one per line)',
type=package_list, required=True,

package_input_group = parser.add_mutually_exclusive_group(required=True)
package_input_group.add_argument(
'--package-list',
help='path to a list of packages (one per line)',
type=package_list,
dest='packages',
)
package_input_group.add_argument(
'--package-list-json',
help='path to a list of packages (one JSON object per line)',
type=package_list_json,
dest='packages',
)

parser.add_argument(
'--output-dir', help='path to output to', required=True,
)
Expand All @@ -210,7 +273,7 @@ def main(argv=None):
args = parser.parse_args(argv)

build_repo(
args.package_list,
args.packages,
args.output_dir,
args.packages_url,
args.title,
Expand Down
2 changes: 1 addition & 1 deletion dumb_pypi/templates/package.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ <h1>{{package_name}}</h1>
<p>Generated on {{date}}.</p>
<ul>
{% for version in versions %}
<li><a href="{{version.url}}">{{version.filename}}</a> ({{version.version}})</li>
<li><a href="{{version.url(packages_url)}}">{{version.filename}}</a> ({{version.info_string}})</li>
{% endfor %}
</ul>
</body>
Expand Down
Loading