Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 24 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,30 @@ An example export file looks like this:
In this example, there is no manual label, but a weakly supervised label `"Negative"` has been set with 62.2% confidence.

### Fetch lookup lists
- [ ] Todo
In your project, you can create lookup lists to implement distant supervision heuristics. To fetch your lookup list(s), you can either get all or fetch one by its list id.
```python
list_id = "your-list-id"
lookup_list = client.get_lookup_list(list_id)
```

The list id can be found in your browser URL when you're on the details page of a lookup list, e.g. when you run on localhost: `http://localhost:4455/app/projects/{project_id}/knowledge-base/{list_id}`.

Alternatively, you can pull all lookup lists:
```python
lookup_lists = client.get_lookup_lists()
```

### Upload files
- [ ] Todo
You can import files directly from your machine to your application:

```python
file_path = "my/file/path/data.json"
upload_was_successful = client.post_file_import(file_path)
```

Alternatively, you can `kern push <path-to-your-file>` via CLI, given that you have provided the `secrets.json` file in the same directory.

**Make sure that you've selected the correct project beforehand, and fit the data schema of existing records in your project!**

### Adapters

Expand Down Expand Up @@ -174,7 +194,8 @@ Let us know what open-source/closed-source NLP framework you are using, for whic

## Roadmap
- [ ] Register heuristics via wrappers
- [ ] Add project upload
- [ ] Up/download zipped projects for versioning via DVC
- [x] Add project upload
- [x] Fetch project statistics


Expand Down
53 changes: 47 additions & 6 deletions kern/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ def __init__(

@classmethod
def from_secrets_file(cls, path_to_file: str):
"""Creates a Client object from a secrets file.

Args:
path_to_file (str): Path to the secrets file.

Returns:
Client: kern.Client object.
"""
with open(path_to_file, "r") as file:
content = json.load(file)
uri = content.get("uri")
Expand All @@ -62,11 +70,24 @@ def get_project_details(self) -> Dict[str, str]:
return api_response

def get_lookup_list(self, list_id: str) -> Dict[str, str]:
"""Fetches a lookup list of your current project.

Args:
list_id (str): The ID of the lookup list.

Returns:
Dict[str, str]: Containing the specified lookup list of your project.
"""
url = settings.get_lookup_list_url(self.project_id, list_id)
api_response = api_calls.get_request(url, self.session_token)
return api_response

def get_lookup_lists(self) -> List[Dict[str, str]]:
"""Fetches all lookup lists of your current project

Returns:
List[Dict[str, str]]: Containing the lookups lists of your project.
"""
lookup_lists = []
for lookup_list_id in self.get_project_details()["knowledge_base_ids"]:
lookup_list = self.get_lookup_list(lookup_list_id)
Expand Down Expand Up @@ -128,14 +149,29 @@ def get_record_export(
msg.good(f"Downloaded export to {download_to}")
return df

def post_file_import(self, path: str) -> bool:
def post_file_import(
self, path: str, import_file_options: Optional[str] = ""
) -> bool:
"""Imports a file into your project.

Args:
path (str): Path to the file to import.
import_file_options (Optional[str], optional): Options for the Pandas import. Defaults to None.

Raises:
FileImportError: If the file could not be imported, an exception is raised.

Returns:
bool: True if the file was imported successfully, False otherwise.
"""
if not os.path.exists(path):
raise Exception(f"Given filepath is not valid. Path: {path}")
raise exceptions.FileImportError(
f"Given filepath is not valid. Path: {path}"
)
last_path_part = path.split("/")[-1]
file_name = f"{last_path_part}_SCALE"
file_type = "records"
import_file_options = ""

FILE_TYPE = "records"
# config
config_url = settings.get_base_config(self.project_id)
config_api_response = api_calls.get_request(
Expand All @@ -150,7 +186,7 @@ def post_file_import(self, path: str) -> bool:
credentials_url,
{
"file_name": file_name,
"file_type": file_type,
"file_type": FILE_TYPE,
"import_file_options": import_file_options,
},
self.session_token,
Expand All @@ -171,4 +207,9 @@ def post_file_import(self, path: str) -> bool:
path,
file_name,
)
return True if success else False
if success:
msg.good(f"Uploaded {path} to your project.")
return True
else:
msg.fail(f"Could not upload {path} to your project.")
return False
15 changes: 12 additions & 3 deletions kern/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,20 @@ def pull():
client.get_record_export(download_to=download_to)


def push(file_path):
client = Client.from_secrets_file("secrets.json")
client.post_file_import(file_path)


def help():
msg.info(
"With the Kern SDK, you can type commands as `kern <command>`. Currently, we provide the following:"
"With the Kern refinery SDK, you can type commands as `kern <command>`. Currently, we provide the following:"
)
msg.info(
"- kern pull: Download the record export of the project defined in `settings.json` to your local storage."
)
msg.info(
"- kern push <path>: Upload a record file to the project defined in `settings.json` from your local storage. Currently in development."
"- kern push <path>: Upload a record file to the project defined in `settings.json` from your local storage."
)


Expand All @@ -33,7 +38,11 @@ def main():
if command == "pull":
pull()
elif command == "push":
msg.warn("Currently in development.")
if len(cli_args) != 2:
msg.fail("Please provide a path to a file when running kern push.")
else:
file_path = cli_args[1]
push(file_path)
elif command == "help":
help()
else:
Expand Down
10 changes: 7 additions & 3 deletions kern/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,14 @@ class InternalServerError(SDKError):
pass


class FileImportError(Exception):
pass


RESPONSE_CODES_API_EXCEPTION_MAP = {
401: UnauthorizedError,
404: NotFoundError,
500: InternalServerError,
401: {"*": UnauthorizedError},
404: {"*": NotFoundError},
500: {"*": InternalServerError},
}


Expand Down
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@
"python-dateutil==2.8.2",
"pytz==2022.1",
"requests==2.27.1",
"boto3==1.24.26",
"botocore==1.27.26",
"six==1.16.0",
"spacy==3.3.1",
"tinycss2==1.1.1",
"tomli==2.0.1",
"typing_extensions==4.2.0",
Expand Down