Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(api): long lived api keys #658

Open
wants to merge 48 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 43 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
c13cbf4
wip: adds migration, auth router, create/revoke api key endpoints
gphorvath Jun 18, 2024
2014957
broke up routes and basic api key operations
gphorvath Jun 20, 2024
cae393c
Checking for invalid times
gphorvath Jun 20, 2024
eb5ec37
add response type to create endpoint
gphorvath Jun 20, 2024
7357c02
mask api key in list endpoint
gphorvath Jun 20, 2024
8be5af3
wip: working on supabase session auth from api key
gphorvath Jun 20, 2024
909c125
switching from supabase-py-async to supabase, all async functionality…
gphorvath Jun 21, 2024
f68a491
more tweaks
gphorvath Jun 21, 2024
fdc843a
fix name collision in files router
gphorvath Jun 21, 2024
f01d4b1
supabase api key session auth works, but requires supabase service ke…
gphorvath Jun 21, 2024
3b70a01
adding an endpoint for testing api key auth
gphorvath Jun 21, 2024
eaac98b
Merge branch 'main' into 561-long-lived-api-keys
gphorvath Jun 25, 2024
ae56c78
reworking to pass api-key as custom header
gphorvath Jun 30, 2024
4e49ba9
update schema to make sure the key is unique and not expired
gphorvath Jun 30, 2024
08db2c1
WIP: Updating assistants endpoints to use api-key auth
gphorvath Jul 1, 2024
f58a42b
wip: schema for assistants rls w/ api key
gphorvath Jul 1, 2024
c0261b8
handle error
gphorvath Jul 1, 2024
c6a586f
can create assistants with api_key
gphorvath Jul 1, 2024
0f32c71
cleaning up og session
gphorvath Jul 2, 2024
4be8366
both api key and jwt auth working
gphorvath Jul 2, 2024
d1a44d0
cutting over all the endpoints, things will probably be broken here
gphorvath Jul 2, 2024
22d427e
fix an issue with validating api_key
gphorvath Jul 2, 2024
8c962d2
constants are good
gphorvath Jul 2, 2024
4a70fa7
Merge branch 'main' into 561-long-lived-api-keys
gphorvath Jul 2, 2024
ea90ac2
fixing tests and stuff
gphorvath Jul 2, 2024
2d743ca
renaming supabase_session
gphorvath Jul 2, 2024
951993e
wip: blowing up auth, things might not work
gphorvath Jul 2, 2024
5021800
fix a couple typos
gphorvath Jul 3, 2024
a7f8f35
big overhaul of underlying crud to simplify auth
gphorvath Jul 3, 2024
6d179bf
revert change to test_config.yaml
gphorvath Jul 3, 2024
66345be
update headers instead of overriding
gphorvath Jul 3, 2024
0c6203f
add name field and test
gphorvath Jul 3, 2024
af4055f
Merge branch 'main' into 561-long-lived-api-keys
gphorvath Jul 3, 2024
b19a0b8
fixes revoke endpoint
gphorvath Jul 3, 2024
d490e27
Merge branch '561-long-lived-api-keys' of github.com:defenseunicorns/…
gphorvath Jul 3, 2024
e30ed2c
changing revoke to return 204 or 404
gphorvath Jul 3, 2024
c6a9ef0
redoc the auth endpoints
gphorvath Jul 3, 2024
96dbbfa
add user_id check to each table when accessed by api key
gphorvath Jul 7, 2024
1c3fad8
handle jwt auth failure
gphorvath Jul 8, 2024
71c77e8
changes var name to EXPOSE_API to be more clear that the API will be …
gphorvath Jul 8, 2024
1ae90f1
Merge branch 'main' into 561-long-lived-api-keys
gphorvath Jul 8, 2024
1ef2c34
adding hints to api_key.py
gphorvath Jul 9, 2024
5d1df8a
reverting variable change
gphorvath Jul 9, 2024
a7c0a17
merge main
gphorvath Jul 9, 2024
aec152c
NOT WORKING YET: updating migration to hash the api key on the DB side
gphorvath Jul 10, 2024
ebe395d
revert a change to zarf.yaml
gphorvath Jul 10, 2024
8bf2bfa
remove unused httpbearer
gphorvath Jul 10, 2024
9bb2a6f
remove more redundant httpbearers
gphorvath Jul 10, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions packages/api/supabase/migrations/20240618163044_api_keys.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
-- Initialize api_keys table
create table api_keys (
name text,
id uuid primary key default uuid_generate_v4(),
user_id uuid references auth.users not null,
api_key text not null unique,
created_at bigint default extract(epoch from now()) not null,
expires_at bigint
);

--- RLS for api_keys table

alter table api_keys enable row level security;

create policy "Read only if API key matches and is current" ON api_keys for
select using (
api_key = current_setting('request.headers')::json->>'x-custom-api-key'
and (expires_at is null or expires_at > extract(epoch from now()))
);

create policy "Individuals can view their own api_keys." on api_keys for
select using (auth.uid() = user_id);
create policy "Individuals can create api_keys." on api_keys for
insert with check (auth.uid() = user_id);
create policy "Individuals can delete their own api_keys." on api_keys for
delete using (auth.uid() = user_id);

-- API Key Policies

create policy "Individuals can CRUD their own assistant_objects via API key."
on assistant_objects for all
to anon
using
(
exists (
select 1
from api_keys
where api_keys.api_key = current_setting('request.headers')::json->>'x-custom-api-key'
and api_keys.user_id = assistant_objects.user_id
)
);

create policy "Individuals can CRUD their own thread_objects via API key."
on thread_objects for all
to anon
using
(
exists (
select 1
from api_keys
where api_keys.api_key = current_setting('request.headers')::json->>'x-custom-api-key'
and api_keys.user_id = thread_objects.user_id
)
);

create policy "Individuals can CRUD their own message_objects via API key."
on message_objects for all
to anon
using
(
exists (
select 1
from api_keys
where api_keys.api_key = current_setting('request.headers')::json->>'x-custom-api-key'
and api_keys.user_id = message_objects.user_id
)
);

create policy "Individuals can CRUD their own file_objects via API key."
on file_objects for all
to anon
using
(
exists (
select 1
from api_keys
where api_keys.api_key = current_setting('request.headers')::json->>'x-custom-api-key'
and api_keys.user_id = file_objects.user_id
)
);

create policy "Individuals can CRUD file_bucket via API key."
on storage.buckets for all
to anon
using
(
exists (
select 1
from api_keys
where api_keys.api_key = current_setting('request.headers')::json->>'x-custom-api-key'
)
);

create policy "Individuals can CRUD their own run_objects via API key."
on run_objects for all
to anon
using
(
exists (
select 1
from api_keys
where api_keys.api_key = current_setting('request.headers')::json->>'x-custom-api-key'
and api_keys.user_id = run_objects.user_id
)
);

create policy "Individuals can CRUD their own vector_store via API key."
on vector_store for all
to anon
using
(
exists (
select 1
from api_keys
where api_keys.api_key = current_setting('request.headers')::json->>'x-custom-api-key'
and api_keys.user_id = vector_store.user_id
)
);

create policy "Individuals can CRUD their own vector_store_file via API key."
on vector_store_file for all
to anon
using
(
exists (
select 1
from api_keys
where api_keys.api_key = current_setting('request.headers')::json->>'x-custom-api-key'
and api_keys.user_id = vector_store_file.user_id
)
);

create policy "Individuals can CRUD their own vector_content via API key."
on vector_content for all
to anon
using
(
exists (
select 1
from api_keys
where api_keys.api_key = current_setting('request.headers')::json->>'x-custom-api-key'
and api_keys.user_id = vector_content.user_id
)
);
2 changes: 1 addition & 1 deletion packages/api/zarf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ constants:
variables:
- name: EXPOSE_OPENAPI_SCHEMA
gphorvath marked this conversation as resolved.
Show resolved Hide resolved
default: "false"
description: "Flag to expose the OpenAPI schema for debugging."
description: "Flag to expose the API service"
- name: HOSTED_DOMAIN
default: "uds.dev"
- name: DEFAULT_EMBEDDINGS_MODEL
Expand Down
13 changes: 10 additions & 3 deletions src/leapfrogai_api/backend/rag/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from langchain_core.embeddings import Embeddings
from openai.types.beta.vector_stores import VectorStoreFile
from openai.types.beta.vector_stores.vector_store_file import LastError
from supabase_py_async import AsyncClient
from supabase import AClient as AsyncClient
from leapfrogai_api.backend.rag.document_loader import load_file, split
from leapfrogai_api.backend.rag.leapfrogai_embeddings import LeapfrogAIEmbeddings
from leapfrogai_api.backend.types import VectorStoreFileStatus
Expand Down Expand Up @@ -184,7 +184,7 @@ async def aadd_documents(
metadata=document.metadata,
embedding=embedding,
)
ids.append(response.data[0]["id"])
ids.append(response[0]["id"])

return ids

Expand Down Expand Up @@ -273,5 +273,12 @@ async def _aadd_vector(
"metadata": metadata,
"embedding": embedding,
}
response = await self.db.from_(self.table_name).insert(row).execute()
data, _count = await self.db.from_(self.table_name).insert(row).execute()

_, response = data

for item in response:
if "user_id" in item:
del item["user_id"]

return response
2 changes: 1 addition & 1 deletion src/leapfrogai_api/backend/rag/query.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Service for querying the RAG model."""

from supabase_py_async import AsyncClient
from supabase import AClient as AsyncClient
from leapfrogai_api.backend.rag.index import IndexingService
from postgrest.base_request_builder import SingleAPIResponse

Expand Down
Empty file.
100 changes: 100 additions & 0 deletions src/leapfrogai_api/backend/security/api_key.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps use constants for the magic numbers in this file or add more explanations in the comments are to why these (32, 4) numbers were chosen.

Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
"""Basic API key security functions."""

import secrets
import hashlib

KEY_PREFIX = "lfai"
KEY_BYTES = 32
CHECKSUM_LENGTH = 4


def generate_api_key() -> tuple[str, str]:
"""Generate an API key.

returns:
read_once_token: str - in the format: {prefix}_{unique_key}_{checksum}
hashed_token: str - in the format: {prefix}_{hashed_key}_{checksum}
"""
unique_key = secrets.token_bytes(KEY_BYTES).hex()
hashed_token = _encode_unique_key(unique_key)
checksum = parse_api_key(hashed_token)[2]

read_once_token = f"{KEY_PREFIX}_{unique_key}_{checksum}"
hashed_token = _encode_unique_key(unique_key)

return read_once_token, hashed_token


def validate_and_encode_api_key(api_key: str) -> tuple[bool, str]:
"""
Validate and encode an API key.

Should be in the form: `{prefix}_{unique_key}_{checksum}`
Copy link
Contributor

@justinthelaw justinthelaw Jul 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should also add some sort of versioning prefix here to add backwards compatibility in case we change the API key algorithm, etc.

Example:

read_once_token = f"{KEY_PREFIX}_{VERSION}.{unique_key}.{checksum}"

Copy link
Contributor Author

@gphorvath gphorvath Jul 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we do this on the database side (see pgcrypto comment) do you think the created_at date in the table is sufficient for this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that would definitely work! In the future, we can just use the created_on date to determine key version in case we need to be backwards compatible.


returns:
valid: bool
encoded_key: str
"""

valid = validate_api_key(api_key)
encoded_key = ""

if valid:
encoded_key = _encode_unique_key(parse_api_key(api_key)[1])

return valid, encoded_key


def _encode_unique_key(unique_key: str) -> str:
"""Hashes and encodes an API key as a string.

returns:
api_key: str # in the format {prefix}_{one_way_hash}_{checksum}
"""
one_way_hash = hashlib.sha256(unique_key.encode()).hexdigest()
Copy link
Contributor

@justinthelaw justinthelaw Jul 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably use a different method here, like bcrypt or pbkdf2_hmac. Both are more secure, if not a little more computationally intensive. For example, 1Password uses a version of pbkdf2_hmac, I believe.

https://security.stackexchange.com/questions/211/how-to-securely-hash-passwords

Example:

key = hashlib.pbkdf2_hmac('sha256', unique_key.encode(), salt.encode(), 100000, dklen=64)
one_way_hash = key.hex()

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is for security purposes, I would also toss pgcrypto in the running there as well.

Though if its not for security purposes then I think its unnecessary to change the algorithm any encoding will do if it gets the job done.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, it was mainly for security best practice. It just depends on how secure we want these API keys to be!

You are right though - if we are only looking for functionality, everything here works.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, definitely need security here. I'll look at these options

Copy link
Contributor Author

@gphorvath gphorvath Jul 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Think I'm going with pgcrypto here. The main advantage is server side hashing. I will update when I finish the commit.

checksum = hashlib.sha256(unique_key.encode()).hexdigest()[:CHECKSUM_LENGTH]
return f"{KEY_PREFIX}_{one_way_hash}_{checksum}"


def _validate_checksum(unique_key: str, checksum: str):
"""Validate the checksum of an API key."""
return hashlib.sha256(unique_key.encode()).hexdigest()[:CHECKSUM_LENGTH] == checksum
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I recommend changing this to secrets.compare_digest(calculated_checksum, checksum)

https://pycharm-security.readthedocs.io/en/latest/fixes/comparedigestfixer.html



def validate_api_key(api_key: str) -> bool:
"""Validate an API key.

returns:
bool: True if the key is valid, False otherwise
"""
try:
_prefix, key, checksum = parse_api_key(api_key)
except ValueError:
return False

return _validate_checksum(key, checksum)


def parse_api_key(api_key: str) -> tuple[str, str, str]:
"""Parse an API key into its components.

key format: {prefix}_{unique_key}_{checksum}

Note: Does not validate prefix, checksum, or key.

returns:
prefix: str
key: str
checksum: str
"""

prefix, key, checksum = (
justinthelaw marked this conversation as resolved.
Show resolved Hide resolved
api_key.split("_")[0],
"_".join(api_key.split("_")[1:-1]),
api_key.split("_")[-1],
)

if not prefix or not key or not checksum:
raise ValueError("Invalid API key format")

return prefix, key, checksum
28 changes: 4 additions & 24 deletions src/leapfrogai_api/data/crud_assistant.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,26 @@
"""CRUD Operations for Assistant."""

from pydantic import BaseModel, Field
from pydantic import BaseModel
from openai.types.beta import Assistant
from supabase_py_async import AsyncClient
from supabase import AClient as AsyncClient
from leapfrogai_api.data.crud_base import CRUDBase


class AuthAssistant(Assistant):
"""A wrapper for the Assistant that includes a user_id for auth"""

user_id: str = Field(default="")


class FilterAssistant(BaseModel):
"""Validation for Assistant filter."""

id: str


class CRUDAssistant(CRUDBase[AuthAssistant]):
class CRUDAssistant(CRUDBase[Assistant]):
"""CRUD Operations for Assistant"""

def __init__(
self,
db: AsyncClient,
table_name: str = "assistant_objects",
):
super().__init__(db=db, model=AuthAssistant, table_name=table_name)

async def create(self, object_: Assistant) -> Assistant | None:
"""Create a new assistant."""
user_id: str = (await self.db.auth.get_user()).user.id
return await super().create(
object_=AuthAssistant(user_id=user_id, **object_.model_dump())
)
super().__init__(db=db, model=Assistant, table_name=table_name)

async def get(self, filters: FilterAssistant | None = None) -> Assistant | None:
"""Get assistant by filters."""
Expand All @@ -45,13 +32,6 @@ async def list(
"""List all assistants."""
return await super().list(filters=filters.model_dump() if filters else None)

async def update(self, id_: str, object_: Assistant) -> Assistant | None:
"""Update an assistant by its ID."""
user_id: str = (await self.db.auth.get_user()).user.id
return await super().update(
id_=id_, object_=AuthAssistant(user_id=user_id, **object_.model_dump())
)

async def delete(self, filters: FilterAssistant | None = None) -> bool:
"""Delete an assistant by its ID."""
return await super().delete(filters=filters.model_dump() if filters else None)
Loading