Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve tooling and backend for creating and working with knowledge posts #308

Merged
merged 3 commits into from
Oct 4, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion knowledge_repo/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import time
import types
import yaml

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -89,7 +90,7 @@ def __set_from_file(self, d, filename, force=False):
self.__set_from_module(d, config, force)
elif filename.endswith('.yml'):
with open(filename) as f:
config = yaml.load(f)
config = yaml.safe_load(f)
self.update(config)

def __set_from_module(self, d, module, force=False):
Expand Down
3 changes: 2 additions & 1 deletion knowledge_repo/converters/pkp.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import zipfile
import io
import os

from ..converter import KnowledgePostConverter


class IpynbFormat(KnowledgePostConverter):
class PkpConverter(KnowledgePostConverter):
_registry_keys = ['kp', 'zip']

def to_file(self, filename):
Expand Down
226 changes: 226 additions & 0 deletions knowledge_repo/repositories/folder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
from __future__ import print_function
from builtins import input

import os
import shutil
import logging
import re
import git
import socket
import time
from io import open

from knowledge_repo._version import __git_uri__
from ..post import KnowledgePost
from ..repository import KnowledgeRepository
from ..utils.exec_code import get_module_for_source
from ..utils.types import str_types
from ..utils.encoding import encode

logger = logging.getLogger(__name__)


class FolderKnowledgeRepository(KnowledgeRepository):
_registry_keys = ['', 'file']

TEMPLATES = {
'README.md': os.path.abspath(os.path.join(os.path.dirname(__file__), '../templates', 'repository_readme.md')),
'.knowledge_repo_config.yml': os.path.abspath(os.path.join(os.path.dirname(__file__), '../templates', 'repository_config.yml'))
}

@classmethod
def create(cls, uri, embed_tooling=False):
if uri.startswith('file://'):
uri = uri[len('file://'):]
path = os.path.abspath(uri)
if not os.path.exists(path):
os.makedirs(path)

# Add README and configuration templates
for filename, template in cls.TEMPLATES.items():
target = os.path.join(path, filename)
if not os.path.exists(target):
shutil.copy(template, target)
else:
logger.warning("Not overriding existing file '{}'.".format(filename))
return FolderKnowledgeRepository(path)

@classmethod
def from_uri(cls, uri, *args, **kwargs):
"""
If this folder is actually a git repository, a `GitKnowledgeRepository`
is returned instead, unless the folder knowledge repository is explicitly
requested via the 'file://' protocol.
"""
check_for_git = True
if uri.startswith('file://'):
check_for_git = False
uri = uri[len('file://'):]
if check_for_git and os.path.exists(os.path.join(uri, '.git')):
from .gitrepository import GitKnowledgeRepository
return GitKnowledgeRepository(uri, *args, **kwargs)
return cls(uri, *args, **kwargs)

def init(self, config='.knowledge_repo_config.yml', auto_create=False):
self.auto_create = auto_create
self.path = self.uri
self.config.update(os.path.join(self.path, config))

@property
def path(self):
return self._path

@path.setter
def path(self, path):
assert isinstance(path, str), "The path specified must be a string."
path = os.path.abspath(os.path.expanduser(path))
if not os.path.exists(path):
path = os.path.abspath(path)
if self.auto_create:
self.create(path)
else:
raise ValueError("Provided path '{}' does not exist.".format(path))
self._path = path

# ----------- Repository actions / state ------------------------------------
@property
def revision(self):
return time.time()

@property
def status(self):
return 'OK'

@property
def status_message(self):
return 'OK'

# ---------------- Post retrieval methods --------------------------------

def _dir(self, prefix, statuses):
posts = set()

if self.PostStatus.PUBLISHED in statuses:

for path, folders, files in os.walk(os.path.join(self.path, prefix or '')):

# Do not visit hidden folders
for folder in folders:
if folder.startswith('.'):
folders.remove(folder)

posts.update(
os.path.join(os.path.relpath(path, start=self.path), folder)
for folder in folders if folder.endswith('.kp')
)
posts.update(
os.path.join(os.path.relpath(path, start=self.path), file)
for file in files if file.endswith('.kp')
)

for post in sorted([post[2:] if post.startswith('./') else post for post in posts]):
yield post

# ------------- Post submission / addition user flow ----------------------
def _add_prepare(self, kp, path, update=False, **kwargs):
pass

def _add_cleanup(self, kp, path, update=False, **kwargs):
pass

def _submit(self, path=None, branch=None, force=False):
pass # Added posts are already submitted

def _publish(self, path): # Publish a post for general perusal
pass # Added posts are already published

def _unpublish(self, path): # unpublish a post for general perusal
raise NotImplementedError

def _accept(self, path): # Approve to publish a post for general perusal
pass

def _remove(self, path, all=False):
shutil.rmtree(os.path.join(self.path, path))

# ------------ Knowledge Post Data Retrieval Methods -------------------------

def _kp_uuid(self, path):
try:
return self._kp_read_ref(path, 'UUID')
except:
return None

def _kp_path(self, path, rel=None):
return KnowledgeRepository._kp_path(self, os.path.expanduser(path), rel=rel or self.path)

def _kp_exists(self, path, revision=None):
return os.path.exists(os.path.join(self.path, path))

def _kp_status(self, path, revision=None, detailed=False, branch=None):
return self.PostStatus.PUBLISHED

def _kp_get_revision(self, path):
# We use a 'REVISION' file in the knowledge post folder rather than using git
# revisions because using git rev-parse is slow.
try:
return int(self._kp_read_ref(path, 'REVISION'))
except:
return 0

def _kp_get_revisions(self, path):
raise NotImplementedError

def _kp_write_ref(self, path, reference, data, uuid=None, revision=None):
path = os.path.join(self.path, path)
if os.path.isfile(path):
kp = KnowledgePost.from_file(path, format='kp')
kp._write_ref(reference, data)
kp.to_file(path, format='kp')
else:
ref_path = os.path.join(path, reference)
ref_dir = os.path.dirname(ref_path)
if not os.path.exists(ref_dir):
os.makedirs(ref_dir)
with open(ref_path, 'wb') as f:
return f.write(data)

def _kp_dir(self, path, parent=None, revision=None): # TODO: Account for revision
path = os.path.join(self.path, path)
if os.path.isdir(path):
if parent:
path = os.path.join(path, parent)
for dirpath, dirnames, filenames in os.walk(os.path.join(self.path, path)):
for filename in filenames:
if dirpath == "" and filename == "REVISION":
continue
yield os.path.relpath(os.path.join(dirpath, filename), os.path.join(self.path, path))
else:
kp = KnowledgePost.from_file(path, format='kp')
for reference in kp._dir(parent=parent):
yield reference

def _kp_has_ref(self, path, reference, revision=None): # TODO: Account for revision
path = os.path.join(self.path, path)
if os.path.isdir(path):
return os.path.isfile(os.path.join(path, reference))
else:
kp = KnowledgePost.from_file(path, format='kp')
return kp._has_ref(reference)

def _kp_diff(self, path, head, base):
raise NotImplementedError

def _kp_new_revision(self, path, uuid=None):
self._kp_write_ref(path, "REVISION", encode(self._kp_get_revision(path) + 1))
if uuid:
self._kp_write_ref(path, "UUID", encode(uuid))

def _kp_read_ref(self, path, reference, revision=None):
path = os.path.join(self.path, path)
if os.path.isdir(path):
with open(os.path.join(self.path, path, reference), 'rb') as f:
return f.read()
else:
kp = KnowledgePost.from_file(path, format='kp')
return kp._read_ref(reference)
2 changes: 1 addition & 1 deletion knowledge_repo/repositories/gitrepository.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@


class GitKnowledgeRepository(KnowledgeRepository):
_registry_keys = ['', 'git']
_registry_keys = ['git']

TEMPLATES = {
'README.md': os.path.abspath(os.path.join(os.path.dirname(__file__), '../templates', 'repository_readme.md')),
Expand Down
6 changes: 5 additions & 1 deletion knowledge_repo/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def for_uri(cls, uri, *args, **kwargs):
if isinstance(uri, dict):
return cls.for_uris(uri)
scheme = urlparse(uri).scheme
return cls._get_subclass_for(scheme)(uri, *args, **kwargs)
return cls._get_subclass_for(scheme).from_uri(uri, *args, **kwargs)

@classmethod
def for_uris(cls, uri):
Expand All @@ -57,6 +57,10 @@ def for_uris(cls, uri):
krs = {name: cls.for_uri(uri) for name, uri in list(uris.items())}
return MetaKnowledgeRepository(krs)

@classmethod
def from_uri(cls, url, *args, **kwargs):
return cls(url, *args, **kwargs)

@classmethod
def create_for_uri(cls, uri, **kwargs):
if isinstance(uri, dict):
Expand Down
8 changes: 4 additions & 4 deletions run_tests.bat
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ IF EXIST "%test_repo_path%" (
RMDIR /Q /S %test_repo_path%
)

%PYTHON%\\python.exe scripts/knowledge_repo --repo="${test_repo_path}" init
%PYTHON%\\python.exe scripts/knowledge_repo --repo="git://${test_repo_path}" init
COPY tests\config_repo.yml %test_repo_path%\.knowledge_repo_config.yml

PUSHD %test_repo_path%
Expand All @@ -35,9 +35,9 @@ PUSHD %test_repo_path%
POPD

# Add some knowledge_posts
%PYTHON%\\python.exe scripts/knowledge_repo --repo="%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.ipynb -p projects/test/ipynb_test -m "Test commit" --branch master
%PYTHON%\\python.exe scripts/knowledge_repo --repo="%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.Rmd -p projects/test/Rmd_test -m "Test commit" --branch master
%PYTHON%\\python.exe scripts/knowledge_repo --repo="%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.md -p projects/test/md_test -m "Test commit" --branch master
%PYTHON%\\python.exe scripts/knowledge_repo --repo="git://%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.ipynb -p projects/test/ipynb_test -m "Test commit" --branch master
%PYTHON%\\python.exe scripts/knowledge_repo --repo="git://%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.Rmd -p projects/test/Rmd_test -m "Test commit" --branch master
%PYTHON%\\python.exe scripts/knowledge_repo --repo="git://%test_repo_path%" --dev add knowledge_repo/templates/knowledge_template.md -p projects/test/md_test -m "Test commit" --branch master

REM "Running regression test suite"
%PYTHON%\\python.exe -m nose --with-coverage --cover-package=knowledge_repo --verbosity=1
16 changes: 8 additions & 8 deletions run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ echo "Creating a test repository in ${test_repo_path}..."
# Remove the repository if it exists
rm -rf ${test_repo_path} &> /dev/null

`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" init
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" init
mkdir -p ${test_repo_path} &> /dev/null
cp `dirname $0`/tests/config_repo.yml ${test_repo_path}/.knowledge_repo_config.yml &> /dev/null

Expand All @@ -36,27 +36,27 @@ git commit -m "Update repository config." &> /dev/null
popd &> /dev/null

# Add some knowledge_posts
`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.ipynb -p projects/test/ipynb_test -m "Test commit" --branch master
`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.Rmd -p projects/test/Rmd_test -m "Test commit" --branch master
`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.md -p projects/test/md_test -m "Test commit" --branch master
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.ipynb -p projects/test/ipynb_test -m "Test commit" --branch master
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.Rmd -p projects/test/Rmd_test -m "Test commit" --branch master
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/knowledge_repo/templates/knowledge_template.md -p projects/test/md_test -m "Test commit" --branch master

for post in $(ls `dirname $0`/tests/test_posts); do
if [[ "${post}" == *.ipynb ]]; then
`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master;
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master;
fi;
if [[ "${post}" == *.Rmd ]]; then
`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master;
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master;
fi;
if [[ "${post}" == *.md ]]; then
`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master;
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" --dev add `dirname $0`/tests/test_posts/${post} -p projects/${post} -m "Test commit" --branch master;
fi;
done

echo
echo "Synchronising database index"
echo "-----------------------------"
echo
`dirname $0`/scripts/knowledge_repo --repo="${test_repo_path}" reindex --config `dirname $0`/tests/config_server.py
`dirname $0`/scripts/knowledge_repo --repo="git://${test_repo_path}" reindex --config `dirname $0`/tests/config_server.py

echo
echo "Running regression test suite"
Expand Down
Loading