diff --git a/apps/stack/src/extract/extract-repository.ts b/apps/stack/src/extract/extract-repository.ts index ed602ca83..18783dda5 100644 --- a/apps/stack/src/extract/extract-repository.ts +++ b/apps/stack/src/extract/extract-repository.ts @@ -37,6 +37,50 @@ const context: Context = { db, }; +const inputSchema = z.object({ + repositoryId: z.number(), + repositoryName: z.string(), + namespaceName: z.string(), + sourceControl: z.literal("gitlab").or(z.literal("github")), + from: z.coerce.date(), + to: z.coerce.date() +}); + +type Input = z.infer; +const extractRepository = async (input: Input, userId: string) => { + const { repositoryId, repositoryName, namespaceName, sourceControl, from, to } = input; + + const sourceControlAccessToken = await getClerkUserToken(userId, `oauth_${sourceControl}`); + + if (sourceControl === "gitlab") { + context.integrations.sourceControl = new GitlabSourceControl(sourceControlAccessToken); + } else if (sourceControl === "github") { + context.integrations.sourceControl = new GitHubSourceControl(sourceControlAccessToken); + } + + const { repository, namespace } = await getRepository({ externalRepositoryId: repositoryId, repositoryName, namespaceName }, context); + + const { instanceId } = await setInstance({ repositoryId: repository.id, userId }, { db: crawlDb, entities: { instances } }); + + await extractRepositoryEvent.publish( + { + repositoryId: repository.id, + namespaceId: namespace.id + }, + { + crawlId: instanceId, + caller: 'extract-repository', + timestamp: new Date().getTime(), + version: 1, + sourceControl, + userId, + from, + to, + } + ); + +} + const contextSchema = z.object({ authorizer: z.object({ jwt: z.object({ @@ -49,17 +93,6 @@ const contextSchema = z.object({ type CTX = z.infer; -const inputSchema = z.object({ - repositoryId: z.number(), - repositoryName: z.string(), - namespaceName: z.string(), - sourceControl: z.literal("gitlab").or(z.literal("github")), - from: z.coerce.date(), - to: z.coerce.date() -}); - -type Input = z.infer; - export const handler = ApiHandler(async (ev) => { const body = useJsonBody() as unknown; @@ -76,7 +109,6 @@ export const handler = ApiHandler(async (ev) => { } let input: Input; - let sourceControlAccessToken: string; try { input = inputSchema.parse(body); @@ -90,48 +122,49 @@ export const handler = ApiHandler(async (ev) => { const { sub } = lambdaContext.authorizer.jwt.claims; - - const { repositoryId, repositoryName, namespaceName, sourceControl, from, to } = input; - - try { - sourceControlAccessToken = await getClerkUserToken(sub, `oauth_${sourceControl}`); - } catch (error) { - return { - statusCode: 500, - body: JSON.stringify({ error: (error as Error).message }), - } - } - - if (sourceControl === "gitlab") { - context.integrations.sourceControl = new GitlabSourceControl(sourceControlAccessToken); - } else if (sourceControl === "github") { - context.integrations.sourceControl = new GitHubSourceControl(sourceControlAccessToken); +try { + await extractRepository(input, sub); +} catch (error) { + return { + statusCode: 500, + body: JSON.stringify({ error: (error as Error).toString() }) } - - const { repository, namespace } = await getRepository({ externalRepositoryId: repositoryId, repositoryName, namespaceName }, context); - - const { instanceId } = await setInstance({ repositoryId: repository.id, userId: sub }, { db: crawlDb, entities: { instances } }); - - await extractRepositoryEvent.publish( - { - repositoryId: repository.id, - namespaceId: namespace.id - }, - { - crawlId: instanceId, - caller: 'extract-repository', - timestamp: new Date().getTime(), - version: 1, - sourceControl, - userId: sub, - from, - to, - } - ); - +} return { statusCode: 200, body: JSON.stringify({}) }; }); + +const CRON_ENV = z.object({ + CRON_USER_ID: z.string(), + PUBLIC_REPO_NAME: z.string(), + PUBLIC_REPO_OWNER: z.string(), +}) +export const cronHandler = async ()=> { + + const validEnv = CRON_ENV.safeParse(process.env); + + if (!validEnv.success) { + console.error("Invalid environment in lambda 'extract-repository.cronHandler':", ...validEnv.error.issues); + throw new Error("Invalid environment"); + } + + const { CRON_USER_ID, PUBLIC_REPO_NAME, PUBLIC_REPO_OWNER } = validEnv.data; + + const utcTodayAt10AM = new Date(); + utcTodayAt10AM.setUTCHours(10, 0, 0, 0); + const utcYesterdayAt10AM = new Date(utcTodayAt10AM); + utcYesterdayAt10AM.setHours(utcTodayAt10AM.getUTCHours() - 24); + + await extractRepository({ + namespaceName: PUBLIC_REPO_OWNER, + repositoryId: 0, + repositoryName: PUBLIC_REPO_NAME, + sourceControl: 'github', + from: utcYesterdayAt10AM, + to: utcTodayAt10AM, + }, CRON_USER_ID); + +} diff --git a/apps/stack/stacks/ExtractStack.ts b/apps/stack/stacks/ExtractStack.ts index 3733aa31a..83ffc3c02 100644 --- a/apps/stack/stacks/ExtractStack.ts +++ b/apps/stack/stacks/ExtractStack.ts @@ -3,6 +3,7 @@ import { Config, EventBus, Queue, + Cron, type StackContext, } from "sst/constructs"; import { z } from "zod"; @@ -158,9 +159,18 @@ export function ExtractStack({ stack }: StackContext) { const ENVSchema = z.object({ CLERK_JWT_ISSUER: z.string(), CLERK_JWT_AUDIENCE: z.string(), + PUBLIC_REPOS: z.string(), + CRON_USER_ID: z.string(), }); + const publicReposSchema = z.array( + z.object({ + owner: z.string(), + name: z.string(), + }) + ); const ENV = ENVSchema.parse(process.env); + const PUBLIC_REPOS = publicReposSchema.parse(JSON.parse(ENV.PUBLIC_REPOS)); const api = new Api(stack, "ExtractApi", { defaults: { @@ -195,6 +205,34 @@ export function ExtractStack({ stack }: StackContext) { }, }); + PUBLIC_REPOS.forEach(publicRepo => { + new Cron(stack, `${publicRepo.name}_ExtractCron`, { + schedule: "cron(00 10 * * ? *)", + job: { + function: { + handler: "src/extract/extract-repository.cronHandler", + environment: { + CRON_USER_ID: ENV.CRON_USER_ID, + PUBLIC_REPO_OWNER: publicRepo.owner, + PUBLIC_REPO_NAME: publicRepo.name, + }, + bind: [ + bus, + EXTRACT_DATABASE_URL, + EXTRACT_DATABASE_AUTH_TOKEN, + CRAWL_DATABASE_URL, + CRAWL_DATABASE_AUTH_TOKEN, + CLERK_SECRET_KEY, + REDIS_URL, + REDIS_TOKEN, + REDIS_USER_TOKEN_TTL + ], + runtime: "nodejs18.x", + } + } + }) + }); + stack.addOutputs({ ApiEndpoint: api.url, }); diff --git a/turbo.json b/turbo.json index c16fefc61..3e4d5786f 100644 --- a/turbo.json +++ b/turbo.json @@ -41,6 +41,12 @@ "CRAWL_DATABASE_URL", "CRAWL_DATABASE_AUTH_TOKEN", "NEXT_PUBLIC_EXTRACT_API_URL", - "NEXT_PUBLIC_TRANSFORM_API_URL" + "NEXT_PUBLIC_TRANSFORM_API_URL", + "PUBLIC_REPOS", + "CRON_USER_ID", + "PUBLIC_REPO_OWNER", + "PUBLIC_REPO_NAME", + "CLERK_JWT_ISSUER", + "CLERK_JWT_AUDIENCE" ] }