Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 46 additions & 28 deletions kern/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

from wasabi import msg
import pandas as pd
from kern import authentication, api_calls, settings, exceptions
from kern import authentication, api_calls, settings, exceptions, util
from typing import List, Optional, Dict
import json
import os.path
from tqdm import tqdm
import spacy

Expand Down Expand Up @@ -127,30 +128,47 @@ def get_record_export(
msg.good(f"Downloaded export to {download_to}")
return df

# TODO: issue #6
# def post_file_import(self, upload_from: str):
# upload_from = f"{upload_from}_SCALE"
# file_type = "records"
# import_file_options = None
# config_url = settings.get_config_url()
# config_api_response = api_calls.get_request(config_url, self.session_token)
# endpoint = config_api_response["KERN_S3_ENDPOINT"]

# import_url = settings.get_import_url(self.project_id)
# import_api_response = api_calls.post_request(
# import_url,
# {
# "file_name": upload_from,
# "file_type": file_type,
# "import_file_options": import_file_options,
# },
# self.session_token,
# )

# credentials = import_api_response["Credentials"]
# access_key = credentials["AccessKeyId"]
# secret_key = credentials["SecretAccessKey"]
# session_token = credentials["SessionToken"]

# upload_task_id = import_api_response["uploadTaskId"]
# return endpoint, access_key, secret_key, session_token, upload_task_id
def post_file_import(self, path: str) -> bool:
if not os.path.exists(path):
raise Exception(f"Given filepath is not valid. Path: {path}")
last_path_part = path.split("/")[-1]
file_name = f"{last_path_part}_SCALE"
file_type = "records"
import_file_options = ""

# config
config_url = settings.get_base_config(self.project_id)
config_api_response = api_calls.get_request(
config_url,
self.session_token,
)
endpoint = config_api_response.get("KERN_S3_ENDPOINT")

# credentials
credentials_url = settings.get_import_url(self.project_id)
credentials_api_response = api_calls.post_request(
credentials_url,
{
"file_name": file_name,
"file_type": file_type,
"import_file_options": import_file_options,
},
self.session_token,
)
credentials = credentials_api_response["Credentials"]
access_key = credentials["AccessKeyId"]
secret_key = credentials["SecretAccessKey"]
session_token = credentials["SessionToken"]
upload_task_id = credentials_api_response["uploadTaskId"]
bucket = credentials_api_response["bucket"]
success = util.s3_upload(
access_key,
secret_key,
session_token,
bucket,
endpoint,
upload_task_id,
path,
file_name,
)
return True if success else False
4 changes: 4 additions & 0 deletions kern/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,7 @@ def get_export_url(project_id: str) -> str:

def get_import_url(project_id: str) -> str:
return f"{get_project_url(project_id)}/import"


def get_base_config(project_id: str) -> str:
return f"{get_project_url(project_id)}/import/base_config"
31 changes: 31 additions & 0 deletions kern/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import boto3
from botocore.client import Config


def s3_upload(
access_key: str,
secret_key: str,
aws_session_token: str,
target_bucket: str,
url: str,
upload_task_id: str,
file_path: str,
file_name: str,
) -> bool:
"""
Connects to the object storage with temporary credentials generated for the
given user_id, project_id and bucket
"""
s3 = boto3.resource(
"s3",
endpoint_url=url,
aws_access_key_id=access_key,
aws_secret_access_key=secret_key,
aws_session_token=aws_session_token,
config=Config(signature_version="s3v4"),
region_name="us-east-1",
)
s3_object = s3.Object(target_bucket, f"{upload_task_id}/{file_name}")
with open(file_path, "rb") as file:
s3_object.put(Body=file)
return True
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ backcall==0.2.0
beautifulsoup4==4.11.1
black==22.3.0
bleach==5.0.0
boto3==1.24.26
botocore==1.27.26
certifi==2021.10.8
cffi==1.15.0
charset-normalizer==2.0.12
Expand All @@ -31,7 +33,6 @@ jupyter-console==6.4.3
jupyter-core==4.10.0
jupyterlab-pygments==0.2.2
jupyterlab-widgets==1.1.0
kern-python-client @ file:///Users/jhoetter/repos/kern-python
MarkupSafe==2.1.1
matplotlib-inline==0.1.3
minio==7.1.8
Expand Down Expand Up @@ -69,6 +70,7 @@ requests==2.27.1
Send2Trash==1.8.0
six==1.16.0
soupsieve==2.3.2.post1
spacy==3.3.1
stack-data==0.2.0
terminado==0.15.0
tinycss2==1.1.1
Expand Down