Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
.vscode/
secrets.json

# Jupyter
*.ipynb
Expand Down
20 changes: 16 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
![kern-python](https://uploads-ssl.webflow.com/61e47fafb12bd56b40022a49/62766400bd3c57b579d289bf_kern-python%20Banner.png)
[![Python 3.9](https://img.shields.io/badge/python-3.9-blue.svg)](https://www.python.org/downloads/release/python-390/)
[![pypi 0.0.3](https://img.shields.io/badge/pypi-0.0.3-yellow.svg)](https://pypi.org/project/kern-sdk/0.0.3/)

# Kern AI API for Python

This is the official Python SDK for Kern AI, your IDE for programmatic data enrichment and management.

## Installation

You can set up this library via either running `$ pip install kern-sdk`, or via cloning this repository and running `$ pip install -r requirements.txt` in your repository.
You can set up this library via either running `$ pip install kern-sdk`, or via cloning this repository and running `$ pip install -r requirements.txt` in this repository.

## Usage
Once you installed the package, you can access the application from any Python terminal as follows:
Expand All @@ -24,13 +25,23 @@ client = Client(username, password, project_id)
# client = Client(username, password, project_id, uri="http://localhost:4455")
```

Alternatively, you can also set up a `secrets.json` file and load it via `Client.from_secrets_file`. If you use a `secrets.json`, you can also use the CLI commands directly (e.g. `kern pull`).
Alternatively, you can provide a `secrets.json` file in your repository, looking as follows:
```json
{
"user_name": "your-username",
"password": "your-password",
"project_id": "your-project-id"
}
```
Again, if you run on your local machine, you should provide also `"uri": "http://localhost:4455"`.

Now, you can easily fetch the data from your project:
```python
df = client.fetch_export()
df = client.get_record_export()
```

Alternatively, you can also just run `kern pull` in your CLI given that you have provided the `secrets.json` file.

The `df` contains data of the following scheme:
- all your record attributes are stored as columns, e.g. `headline` or `running_id` if you uploaded records like `{"headline": "some text", "running_id": 1234}`
- per labeling task three columns:
Expand All @@ -42,7 +53,8 @@ With the `client`, you easily integrate your data into any kind of system; may i

## Roadmap
- [ ] Register information sources via wrappers
- [ ] Fetch project statistics
- [ ] Add project upload
- [x] Fetch project statistics


If you want to have something added, feel free to open an [issue](https://github.com/code-kern-ai/kern-python/issues).
Expand Down
23 changes: 23 additions & 0 deletions cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from kern import Client
import sys


def pull():
client = Client.from_secrets_file("secrets.json")
project_name = client.get_project_details()["name"]
download_to = f"{project_name}.json"
client.get_record_export(download_to=download_to)


def main():
cli_args = sys.argv[1:]

# currently only need to easily pull data;
# in the near future, this might be expanded
cli_arg = cli_args[0]
if cli_arg == "pull":
pull()


if __name__ == "__main__":
main()
72 changes: 66 additions & 6 deletions kern/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from wasabi import msg
import pandas as pd
from kern import authentication, api_calls, settings, exceptions
from typing import Optional
from typing import Optional, Dict
import json


class Client:
Expand All @@ -20,7 +21,7 @@ class Client:
"""

def __init__(
self, user_name: str, password: str, project_id: str, uri="https://app.kern.ai"
self, user_name: str, password: str, project_id: str, uri=settings.DEFAULT_URI
):
settings.set_base_uri(uri)
self.session_token = authentication.create_session_token(
Expand All @@ -33,16 +34,75 @@ def __init__(
raise exceptions.get_api_exception_class(401)
self.project_id = project_id

def fetch_export(self, num_samples: Optional[int] = None) -> pd.DataFrame:
@classmethod
def from_secrets_file(cls, path_to_file: str):
with open(path_to_file, "r") as file:
content = json.load(file)
uri = content.get("uri")
if uri is None:
uri = settings.DEFAULT_URI
return cls(
user_name=content["user_name"],
password=content["password"],
project_id=content["project_id"],
uri=uri,
)

def get_project_details(self) -> Dict[str, str]:
"""Collect high-level information about your project: name, description, and tokenizer

Returns:
Dict[str, str]: dictionary containing the above information
"""
url = settings.get_project_url(self.project_id)
api_response = api_calls.get_request(url, self.session_token)
return api_response

def get_record_export(
self, num_samples: Optional[int] = None, download_to: Optional[str] = None
) -> pd.DataFrame:
"""Collects the export data of your project (i.e. the same data if you would export in the web app).

Args:
num_samples (Optional[int], optional): If set, only the first `num_samples` records are collected. Defaults to None.

Returns:
pd.DataFrame: DataFrame containing your record data. For more details, see https://docs.kern.ai
pd.DataFrame: DataFrame containing your record data.
"""
url = settings.get_export_url(self.project_id, num_samples=num_samples)
api_response = api_calls.get_request(url, self.session_token)
url = settings.get_export_url(self.project_id)
api_response = api_calls.get_request(
url, self.session_token, **{"num_samples": num_samples}
)
df = pd.read_json(api_response)
if download_to is not None:
df.to_json(download_to, orient="records")
msg.good(f"Downloaded export to {download_to}")
return df

# TODO: issue #6
# def post_file_import(self, upload_from: str):
# upload_from = f"{upload_from}_SCALE"
# file_type = "records"
# import_file_options = None
# config_url = settings.get_config_url()
# config_api_response = api_calls.get_request(config_url, self.session_token)
# endpoint = config_api_response["KERN_S3_ENDPOINT"]

# import_url = settings.get_import_url(self.project_id)
# import_api_response = api_calls.post_request(
# import_url,
# {
# "file_name": upload_from,
# "file_type": file_type,
# "import_file_options": import_file_options,
# },
# self.session_token,
# )

# credentials = import_api_response["Credentials"]
# access_key = credentials["AccessKeyId"]
# secret_key = credentials["SecretAccessKey"]
# session_token = credentials["SessionToken"]

# upload_task_id = import_api_response["uploadTaskId"]
# return endpoint, access_key, secret_key, session_token, upload_task_id
16 changes: 10 additions & 6 deletions kern/api_calls.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# -*- coding: utf-8 -*-
import json
from json.decoder import JSONDecodeError
import pkg_resources
from kern import exceptions
import requests
from typing import Any, Dict

try:
version = pkg_resources.get_distribution("kern-python-client").version
version = pkg_resources.get_distribution("kern-sdk").version
except pkg_resources.DistributionNotFound:
version = "noversion"

Expand All @@ -17,24 +18,27 @@ def post_request(url: str, body: Dict[str, Any], session_token: str) -> str:
return _handle_response(response)


def get_request(url: str, session_token: str) -> str:
def get_request(url: str, session_token: str, **query_params) -> str:
headers = _build_headers(session_token)
response = requests.get(url=url, headers=headers)
response = requests.get(url=url, headers=headers, params=query_params)
return _handle_response(response)


def _build_headers(session_token: str) -> Dict[str, str]:
return {
"Content-Type": "application/json",
"User-Agent": f"python-sdk-{version}",
"Authorization": f"Bearer {session_token}",
"content-type": "application/json",
"user-agent": f"python-sdk-{version}",
"authorization": f"Bearer {session_token}",
"identifier": session_token,
}


def _handle_response(response: requests.Response) -> str:
status_code = response.status_code
if status_code == 200:
json_data = response.json()
if type(json_data) == str:
json_data = json.loads(json_data)
return json_data
else:
try:
Expand Down
23 changes: 19 additions & 4 deletions kern/settings.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
BASE_URI: str
DEFAULT_URI: str = "https://app.kern.ai"


def set_base_uri(uri: str):
Expand All @@ -23,7 +24,21 @@ def get_authentication_url() -> str:
return f"{BASE_URI}/.ory/kratos/public/self-service/login/api"


def get_export_url(project_id: str, **kwargs) -> str:
url = f"{BASE_URI}/api/project/{project_id}/export"
url = add_query_params(url, **kwargs)
return url
def get_config_url():
return f"{BASE_URI}/api/config/"


def get_project_url(project_id: str):
return f"{BASE_URI}/api/project/{project_id}"


def get_records_url(project_id: str):
return f"{get_project_url(project_id)}/records"


def get_export_url(project_id: str) -> str:
return f"{get_project_url(project_id)}/export"


def get_import_url(project_id: str) -> str:
return f"{get_project_url(project_id)}/import"
64 changes: 64 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,19 +1,83 @@
appnope==0.1.3
argon2-cffi==21.3.0
argon2-cffi-bindings==21.2.0
asttokens==2.0.5
attrs==21.4.0
backcall==0.2.0
beautifulsoup4==4.11.1
black==22.3.0
bleach==5.0.0
certifi==2021.10.8
cffi==1.15.0
charset-normalizer==2.0.12
click==8.1.3
debugpy==1.6.0
decorator==5.1.1
defusedxml==0.7.1
entrypoints==0.4
executing==0.8.3
fastjsonschema==2.15.3
idna==3.3
ipykernel==6.13.0
ipython==8.3.0
ipython-genutils==0.2.0
ipywidgets==7.7.0
jedi==0.18.1
Jinja2==3.1.2
jsonschema==4.5.1
jupyter==1.0.0
jupyter-client==7.3.1
jupyter-console==6.4.3
jupyter-core==4.10.0
jupyterlab-pygments==0.2.2
jupyterlab-widgets==1.1.0
kern-python-client @ file:///Users/jhoetter/repos/kern-python
MarkupSafe==2.1.1
matplotlib-inline==0.1.3
minio==7.1.8
mistune==0.8.4
mypy-extensions==0.4.3
nbclient==0.6.3
nbconvert==6.5.0
nbformat==5.4.0
nest-asyncio==1.5.5
notebook==6.4.11
numpy==1.22.3
packaging==21.3
pandas==1.4.2
pandocfilters==1.5.0
parso==0.8.3
pathspec==0.9.0
pexpect==4.8.0
pickleshare==0.7.5
platformdirs==2.5.2
prometheus-client==0.14.1
prompt-toolkit==3.0.29
psutil==5.9.0
ptyprocess==0.7.0
pure-eval==0.2.2
pycparser==2.21
Pygments==2.12.0
pyparsing==3.0.9
pyrsistent==0.18.1
python-dateutil==2.8.2
pytz==2022.1
pyzmq==22.3.0
qtconsole==5.3.0
QtPy==2.1.0
requests==2.27.1
Send2Trash==1.8.0
six==1.16.0
soupsieve==2.3.2.post1
stack-data==0.2.0
terminado==0.15.0
tinycss2==1.1.1
tomli==2.0.1
tornado==6.1
traitlets==5.2.1.post0
typing_extensions==4.2.0
urllib3==1.26.9
wasabi==0.9.1
wcwidth==0.2.5
webencodings==0.5.1
widgetsnbextension==3.6.0
11 changes: 8 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
long_description = file.read()

setup(
name="kern-python-client",
version="0.0.1",
name="kern-sdk",
version="0.0.3",
author="jhoetter",
author_email="johannes.hoetter@kern.ai",
description="Official Python SDK for the Kern AI API",
description="Official SDK for the Kern AI API",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/code-kern-ai/kern-python",
Expand Down Expand Up @@ -44,4 +44,9 @@
"urllib3==1.26.9",
"wasabi==0.9.1",
],
entry_points={
"console_scripts": [
"kern=cli:main",
],
},
)