diff --git a/.gitignore b/.gitignore index 2c995a6..86f5eb5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .vscode/ +secrets.json # Jupyter *.ipynb diff --git a/README.md b/README.md index af130fe..e83573b 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ ![kern-python](https://uploads-ssl.webflow.com/61e47fafb12bd56b40022a49/62766400bd3c57b579d289bf_kern-python%20Banner.png) [![Python 3.9](https://img.shields.io/badge/python-3.9-blue.svg)](https://www.python.org/downloads/release/python-390/) +[![pypi 0.0.3](https://img.shields.io/badge/pypi-0.0.3-yellow.svg)](https://pypi.org/project/kern-sdk/0.0.3/) # Kern AI API for Python @@ -7,7 +8,7 @@ This is the official Python SDK for Kern AI, your IDE for programmatic data enri ## Installation -You can set up this library via either running `$ pip install kern-sdk`, or via cloning this repository and running `$ pip install -r requirements.txt` in your repository. +You can set up this library via either running `$ pip install kern-sdk`, or via cloning this repository and running `$ pip install -r requirements.txt` in this repository. ## Usage Once you installed the package, you can access the application from any Python terminal as follows: @@ -24,13 +25,23 @@ client = Client(username, password, project_id) # client = Client(username, password, project_id, uri="http://localhost:4455") ``` -Alternatively, you can also set up a `secrets.json` file and load it via `Client.from_secrets_file`. If you use a `secrets.json`, you can also use the CLI commands directly (e.g. `kern pull`). +Alternatively, you can provide a `secrets.json` file in your repository, looking as follows: +```json +{ + "user_name": "your-username", + "password": "your-password", + "project_id": "your-project-id" +} +``` +Again, if you run on your local machine, you should provide also `"uri": "http://localhost:4455"`. Now, you can easily fetch the data from your project: ```python -df = client.fetch_export() +df = client.get_record_export() ``` +Alternatively, you can also just run `kern pull` in your CLI given that you have provided the `secrets.json` file. + The `df` contains data of the following scheme: - all your record attributes are stored as columns, e.g. `headline` or `running_id` if you uploaded records like `{"headline": "some text", "running_id": 1234}` - per labeling task three columns: @@ -42,7 +53,8 @@ With the `client`, you easily integrate your data into any kind of system; may i ## Roadmap - [ ] Register information sources via wrappers -- [ ] Fetch project statistics +- [ ] Add project upload +- [x] Fetch project statistics If you want to have something added, feel free to open an [issue](https://github.com/code-kern-ai/kern-python/issues). diff --git a/cli.py b/cli.py new file mode 100644 index 0000000..fc4aaa8 --- /dev/null +++ b/cli.py @@ -0,0 +1,23 @@ +from kern import Client +import sys + + +def pull(): + client = Client.from_secrets_file("secrets.json") + project_name = client.get_project_details()["name"] + download_to = f"{project_name}.json" + client.get_record_export(download_to=download_to) + + +def main(): + cli_args = sys.argv[1:] + + # currently only need to easily pull data; + # in the near future, this might be expanded + cli_arg = cli_args[0] + if cli_arg == "pull": + pull() + + +if __name__ == "__main__": + main() diff --git a/kern/__init__.py b/kern/__init__.py index dbd1e5b..0aaaed7 100644 --- a/kern/__init__.py +++ b/kern/__init__.py @@ -3,7 +3,8 @@ from wasabi import msg import pandas as pd from kern import authentication, api_calls, settings, exceptions -from typing import Optional +from typing import Optional, Dict +import json class Client: @@ -20,7 +21,7 @@ class Client: """ def __init__( - self, user_name: str, password: str, project_id: str, uri="https://app.kern.ai" + self, user_name: str, password: str, project_id: str, uri=settings.DEFAULT_URI ): settings.set_base_uri(uri) self.session_token = authentication.create_session_token( @@ -33,16 +34,75 @@ def __init__( raise exceptions.get_api_exception_class(401) self.project_id = project_id - def fetch_export(self, num_samples: Optional[int] = None) -> pd.DataFrame: + @classmethod + def from_secrets_file(cls, path_to_file: str): + with open(path_to_file, "r") as file: + content = json.load(file) + uri = content.get("uri") + if uri is None: + uri = settings.DEFAULT_URI + return cls( + user_name=content["user_name"], + password=content["password"], + project_id=content["project_id"], + uri=uri, + ) + + def get_project_details(self) -> Dict[str, str]: + """Collect high-level information about your project: name, description, and tokenizer + + Returns: + Dict[str, str]: dictionary containing the above information + """ + url = settings.get_project_url(self.project_id) + api_response = api_calls.get_request(url, self.session_token) + return api_response + + def get_record_export( + self, num_samples: Optional[int] = None, download_to: Optional[str] = None + ) -> pd.DataFrame: """Collects the export data of your project (i.e. the same data if you would export in the web app). Args: num_samples (Optional[int], optional): If set, only the first `num_samples` records are collected. Defaults to None. Returns: - pd.DataFrame: DataFrame containing your record data. For more details, see https://docs.kern.ai + pd.DataFrame: DataFrame containing your record data. """ - url = settings.get_export_url(self.project_id, num_samples=num_samples) - api_response = api_calls.get_request(url, self.session_token) + url = settings.get_export_url(self.project_id) + api_response = api_calls.get_request( + url, self.session_token, **{"num_samples": num_samples} + ) df = pd.read_json(api_response) + if download_to is not None: + df.to_json(download_to, orient="records") + msg.good(f"Downloaded export to {download_to}") return df + + # TODO: issue #6 + # def post_file_import(self, upload_from: str): + # upload_from = f"{upload_from}_SCALE" + # file_type = "records" + # import_file_options = None + # config_url = settings.get_config_url() + # config_api_response = api_calls.get_request(config_url, self.session_token) + # endpoint = config_api_response["KERN_S3_ENDPOINT"] + + # import_url = settings.get_import_url(self.project_id) + # import_api_response = api_calls.post_request( + # import_url, + # { + # "file_name": upload_from, + # "file_type": file_type, + # "import_file_options": import_file_options, + # }, + # self.session_token, + # ) + + # credentials = import_api_response["Credentials"] + # access_key = credentials["AccessKeyId"] + # secret_key = credentials["SecretAccessKey"] + # session_token = credentials["SessionToken"] + + # upload_task_id = import_api_response["uploadTaskId"] + # return endpoint, access_key, secret_key, session_token, upload_task_id diff --git a/kern/api_calls.py b/kern/api_calls.py index 047a8e2..3a7d103 100644 --- a/kern/api_calls.py +++ b/kern/api_calls.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import json from json.decoder import JSONDecodeError import pkg_resources from kern import exceptions @@ -6,7 +7,7 @@ from typing import Any, Dict try: - version = pkg_resources.get_distribution("kern-python-client").version + version = pkg_resources.get_distribution("kern-sdk").version except pkg_resources.DistributionNotFound: version = "noversion" @@ -17,17 +18,18 @@ def post_request(url: str, body: Dict[str, Any], session_token: str) -> str: return _handle_response(response) -def get_request(url: str, session_token: str) -> str: +def get_request(url: str, session_token: str, **query_params) -> str: headers = _build_headers(session_token) - response = requests.get(url=url, headers=headers) + response = requests.get(url=url, headers=headers, params=query_params) return _handle_response(response) def _build_headers(session_token: str) -> Dict[str, str]: return { - "Content-Type": "application/json", - "User-Agent": f"python-sdk-{version}", - "Authorization": f"Bearer {session_token}", + "content-type": "application/json", + "user-agent": f"python-sdk-{version}", + "authorization": f"Bearer {session_token}", + "identifier": session_token, } @@ -35,6 +37,8 @@ def _handle_response(response: requests.Response) -> str: status_code = response.status_code if status_code == 200: json_data = response.json() + if type(json_data) == str: + json_data = json.loads(json_data) return json_data else: try: diff --git a/kern/settings.py b/kern/settings.py index 11688e8..eac58c8 100644 --- a/kern/settings.py +++ b/kern/settings.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- BASE_URI: str +DEFAULT_URI: str = "https://app.kern.ai" def set_base_uri(uri: str): @@ -23,7 +24,21 @@ def get_authentication_url() -> str: return f"{BASE_URI}/.ory/kratos/public/self-service/login/api" -def get_export_url(project_id: str, **kwargs) -> str: - url = f"{BASE_URI}/api/project/{project_id}/export" - url = add_query_params(url, **kwargs) - return url +def get_config_url(): + return f"{BASE_URI}/api/config/" + + +def get_project_url(project_id: str): + return f"{BASE_URI}/api/project/{project_id}" + + +def get_records_url(project_id: str): + return f"{get_project_url(project_id)}/records" + + +def get_export_url(project_id: str) -> str: + return f"{get_project_url(project_id)}/export" + + +def get_import_url(project_id: str) -> str: + return f"{get_project_url(project_id)}/import" diff --git a/requirements.txt b/requirements.txt index 45f9731..a0c2849 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,19 +1,83 @@ +appnope==0.1.3 +argon2-cffi==21.3.0 +argon2-cffi-bindings==21.2.0 +asttokens==2.0.5 +attrs==21.4.0 +backcall==0.2.0 +beautifulsoup4==4.11.1 black==22.3.0 +bleach==5.0.0 certifi==2021.10.8 +cffi==1.15.0 charset-normalizer==2.0.12 click==8.1.3 +debugpy==1.6.0 +decorator==5.1.1 +defusedxml==0.7.1 +entrypoints==0.4 +executing==0.8.3 +fastjsonschema==2.15.3 idna==3.3 +ipykernel==6.13.0 +ipython==8.3.0 +ipython-genutils==0.2.0 +ipywidgets==7.7.0 +jedi==0.18.1 +Jinja2==3.1.2 +jsonschema==4.5.1 +jupyter==1.0.0 +jupyter-client==7.3.1 +jupyter-console==6.4.3 +jupyter-core==4.10.0 +jupyterlab-pygments==0.2.2 +jupyterlab-widgets==1.1.0 +kern-python-client @ file:///Users/jhoetter/repos/kern-python +MarkupSafe==2.1.1 +matplotlib-inline==0.1.3 +minio==7.1.8 +mistune==0.8.4 mypy-extensions==0.4.3 +nbclient==0.6.3 +nbconvert==6.5.0 +nbformat==5.4.0 +nest-asyncio==1.5.5 +notebook==6.4.11 numpy==1.22.3 +packaging==21.3 pandas==1.4.2 +pandocfilters==1.5.0 +parso==0.8.3 pathspec==0.9.0 +pexpect==4.8.0 +pickleshare==0.7.5 platformdirs==2.5.2 +prometheus-client==0.14.1 +prompt-toolkit==3.0.29 +psutil==5.9.0 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pycparser==2.21 +Pygments==2.12.0 +pyparsing==3.0.9 +pyrsistent==0.18.1 python-dateutil==2.8.2 pytz==2022.1 +pyzmq==22.3.0 +qtconsole==5.3.0 +QtPy==2.1.0 requests==2.27.1 +Send2Trash==1.8.0 six==1.16.0 +soupsieve==2.3.2.post1 +stack-data==0.2.0 +terminado==0.15.0 tinycss2==1.1.1 tomli==2.0.1 +tornado==6.1 +traitlets==5.2.1.post0 typing_extensions==4.2.0 urllib3==1.26.9 wasabi==0.9.1 +wcwidth==0.2.5 +webencodings==0.5.1 +widgetsnbextension==3.6.0 diff --git a/setup.py b/setup.py index 3977945..1587d92 100644 --- a/setup.py +++ b/setup.py @@ -9,11 +9,11 @@ long_description = file.read() setup( - name="kern-python-client", - version="0.0.1", + name="kern-sdk", + version="0.0.3", author="jhoetter", author_email="johannes.hoetter@kern.ai", - description="Official Python SDK for the Kern AI API", + description="Official SDK for the Kern AI API", long_description=long_description, long_description_content_type="text/markdown", url="https://github.com/code-kern-ai/kern-python", @@ -44,4 +44,9 @@ "urllib3==1.26.9", "wasabi==0.9.1", ], + entry_points={ + "console_scripts": [ + "kern=cli:main", + ], + }, )