diff --git a/kern/__init__.py b/kern/__init__.py index dfcffd3..cb94726 100644 --- a/kern/__init__.py +++ b/kern/__init__.py @@ -2,9 +2,10 @@ from wasabi import msg import pandas as pd -from kern import authentication, api_calls, settings, exceptions +from kern import authentication, api_calls, settings, exceptions, util from typing import List, Optional, Dict import json +import os.path from tqdm import tqdm import spacy @@ -127,30 +128,47 @@ def get_record_export( msg.good(f"Downloaded export to {download_to}") return df - # TODO: issue #6 - # def post_file_import(self, upload_from: str): - # upload_from = f"{upload_from}_SCALE" - # file_type = "records" - # import_file_options = None - # config_url = settings.get_config_url() - # config_api_response = api_calls.get_request(config_url, self.session_token) - # endpoint = config_api_response["KERN_S3_ENDPOINT"] - - # import_url = settings.get_import_url(self.project_id) - # import_api_response = api_calls.post_request( - # import_url, - # { - # "file_name": upload_from, - # "file_type": file_type, - # "import_file_options": import_file_options, - # }, - # self.session_token, - # ) - - # credentials = import_api_response["Credentials"] - # access_key = credentials["AccessKeyId"] - # secret_key = credentials["SecretAccessKey"] - # session_token = credentials["SessionToken"] - - # upload_task_id = import_api_response["uploadTaskId"] - # return endpoint, access_key, secret_key, session_token, upload_task_id + def post_file_import(self, path: str) -> bool: + if not os.path.exists(path): + raise Exception(f"Given filepath is not valid. Path: {path}") + last_path_part = path.split("/")[-1] + file_name = f"{last_path_part}_SCALE" + file_type = "records" + import_file_options = "" + + # config + config_url = settings.get_base_config(self.project_id) + config_api_response = api_calls.get_request( + config_url, + self.session_token, + ) + endpoint = config_api_response.get("KERN_S3_ENDPOINT") + + # credentials + credentials_url = settings.get_import_url(self.project_id) + credentials_api_response = api_calls.post_request( + credentials_url, + { + "file_name": file_name, + "file_type": file_type, + "import_file_options": import_file_options, + }, + self.session_token, + ) + credentials = credentials_api_response["Credentials"] + access_key = credentials["AccessKeyId"] + secret_key = credentials["SecretAccessKey"] + session_token = credentials["SessionToken"] + upload_task_id = credentials_api_response["uploadTaskId"] + bucket = credentials_api_response["bucket"] + success = util.s3_upload( + access_key, + secret_key, + session_token, + bucket, + endpoint, + upload_task_id, + path, + file_name, + ) + return True if success else False diff --git a/kern/settings.py b/kern/settings.py index 8ff12f5..0adcb50 100644 --- a/kern/settings.py +++ b/kern/settings.py @@ -42,3 +42,7 @@ def get_export_url(project_id: str) -> str: def get_import_url(project_id: str) -> str: return f"{get_project_url(project_id)}/import" + + +def get_base_config(project_id: str) -> str: + return f"{get_project_url(project_id)}/import/base_config" diff --git a/kern/util.py b/kern/util.py new file mode 100644 index 0000000..1130996 --- /dev/null +++ b/kern/util.py @@ -0,0 +1,31 @@ +import boto3 +from botocore.client import Config + + +def s3_upload( + access_key: str, + secret_key: str, + aws_session_token: str, + target_bucket: str, + url: str, + upload_task_id: str, + file_path: str, + file_name: str, +) -> bool: + """ + Connects to the object storage with temporary credentials generated for the + given user_id, project_id and bucket + """ + s3 = boto3.resource( + "s3", + endpoint_url=url, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + aws_session_token=aws_session_token, + config=Config(signature_version="s3v4"), + region_name="us-east-1", + ) + s3_object = s3.Object(target_bucket, f"{upload_task_id}/{file_name}") + with open(file_path, "rb") as file: + s3_object.put(Body=file) + return True diff --git a/requirements.txt b/requirements.txt index a0c2849..24d47e6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,8 @@ backcall==0.2.0 beautifulsoup4==4.11.1 black==22.3.0 bleach==5.0.0 +boto3==1.24.26 +botocore==1.27.26 certifi==2021.10.8 cffi==1.15.0 charset-normalizer==2.0.12 @@ -31,7 +33,6 @@ jupyter-console==6.4.3 jupyter-core==4.10.0 jupyterlab-pygments==0.2.2 jupyterlab-widgets==1.1.0 -kern-python-client @ file:///Users/jhoetter/repos/kern-python MarkupSafe==2.1.1 matplotlib-inline==0.1.3 minio==7.1.8 @@ -69,6 +70,7 @@ requests==2.27.1 Send2Trash==1.8.0 six==1.16.0 soupsieve==2.3.2.post1 +spacy==3.3.1 stack-data==0.2.0 terminado==0.15.0 tinycss2==1.1.1