Skip to content
133 changes: 83 additions & 50 deletions apps/stack/src/extract/extract-repository.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,50 @@ const context: Context<GetRepositorySourceControl, GetRepositoryEntities> = {
db,
};

const inputSchema = z.object({
repositoryId: z.number(),
repositoryName: z.string(),
namespaceName: z.string(),
sourceControl: z.literal("gitlab").or(z.literal("github")),
from: z.coerce.date(),
to: z.coerce.date()
});

type Input = z.infer<typeof inputSchema>;
const extractRepository = async (input: Input, userId: string) => {
const { repositoryId, repositoryName, namespaceName, sourceControl, from, to } = input;

const sourceControlAccessToken = await getClerkUserToken(userId, `oauth_${sourceControl}`);

if (sourceControl === "gitlab") {
context.integrations.sourceControl = new GitlabSourceControl(sourceControlAccessToken);
} else if (sourceControl === "github") {
context.integrations.sourceControl = new GitHubSourceControl(sourceControlAccessToken);
}

const { repository, namespace } = await getRepository({ externalRepositoryId: repositoryId, repositoryName, namespaceName }, context);

const { instanceId } = await setInstance({ repositoryId: repository.id, userId }, { db: crawlDb, entities: { instances } });

await extractRepositoryEvent.publish(
{
repositoryId: repository.id,
namespaceId: namespace.id
},
{
crawlId: instanceId,
caller: 'extract-repository',
timestamp: new Date().getTime(),
version: 1,
sourceControl,
userId,
from,
to,
}
);

}

const contextSchema = z.object({
authorizer: z.object({
jwt: z.object({
Expand All @@ -49,17 +93,6 @@ const contextSchema = z.object({

type CTX = z.infer<typeof contextSchema>;

const inputSchema = z.object({
repositoryId: z.number(),
repositoryName: z.string(),
namespaceName: z.string(),
sourceControl: z.literal("gitlab").or(z.literal("github")),
from: z.coerce.date(),
to: z.coerce.date()
});

type Input = z.infer<typeof inputSchema>;

export const handler = ApiHandler(async (ev) => {

const body = useJsonBody() as unknown;
Expand All @@ -76,7 +109,6 @@ export const handler = ApiHandler(async (ev) => {
}

let input: Input;
let sourceControlAccessToken: string;

try {
input = inputSchema.parse(body);
Expand All @@ -90,48 +122,49 @@ export const handler = ApiHandler(async (ev) => {

const { sub } = lambdaContext.authorizer.jwt.claims;


const { repositoryId, repositoryName, namespaceName, sourceControl, from, to } = input;

try {
sourceControlAccessToken = await getClerkUserToken(sub, `oauth_${sourceControl}`);
} catch (error) {
return {
statusCode: 500,
body: JSON.stringify({ error: (error as Error).message }),
}
}

if (sourceControl === "gitlab") {
context.integrations.sourceControl = new GitlabSourceControl(sourceControlAccessToken);
} else if (sourceControl === "github") {
context.integrations.sourceControl = new GitHubSourceControl(sourceControlAccessToken);
try {
await extractRepository(input, sub);
} catch (error) {
return {
statusCode: 500,
body: JSON.stringify({ error: (error as Error).toString() })
}

const { repository, namespace } = await getRepository({ externalRepositoryId: repositoryId, repositoryName, namespaceName }, context);

const { instanceId } = await setInstance({ repositoryId: repository.id, userId: sub }, { db: crawlDb, entities: { instances } });

await extractRepositoryEvent.publish(
{
repositoryId: repository.id,
namespaceId: namespace.id
},
{
crawlId: instanceId,
caller: 'extract-repository',
timestamp: new Date().getTime(),
version: 1,
sourceControl,
userId: sub,
from,
to,
}
);

}

return {
statusCode: 200,
body: JSON.stringify({})
};
});

const CRON_ENV = z.object({
CRON_USER_ID: z.string(),
PUBLIC_REPO_NAME: z.string(),
PUBLIC_REPO_OWNER: z.string(),
})
export const cronHandler = async ()=> {

const validEnv = CRON_ENV.safeParse(process.env);

if (!validEnv.success) {
console.error("Invalid environment in lambda 'extract-repository.cronHandler':", ...validEnv.error.issues);
throw new Error("Invalid environment");
}

const { CRON_USER_ID, PUBLIC_REPO_NAME, PUBLIC_REPO_OWNER } = validEnv.data;

const utcTodayAt10AM = new Date();
utcTodayAt10AM.setUTCHours(10, 0, 0, 0);
const utcYesterdayAt10AM = new Date(utcTodayAt10AM);
utcYesterdayAt10AM.setHours(utcTodayAt10AM.getUTCHours() - 24);

await extractRepository({
namespaceName: PUBLIC_REPO_OWNER,
repositoryId: 0,
repositoryName: PUBLIC_REPO_NAME,
sourceControl: 'github',
from: utcYesterdayAt10AM,
to: utcTodayAt10AM,
}, CRON_USER_ID);

}
38 changes: 38 additions & 0 deletions apps/stack/stacks/ExtractStack.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import {
Config,
EventBus,
Queue,
Cron,
type StackContext,
} from "sst/constructs";
import { z } from "zod";
Expand Down Expand Up @@ -158,9 +159,18 @@ export function ExtractStack({ stack }: StackContext) {
const ENVSchema = z.object({
CLERK_JWT_ISSUER: z.string(),
CLERK_JWT_AUDIENCE: z.string(),
PUBLIC_REPOS: z.string(),
CRON_USER_ID: z.string(),
});
const publicReposSchema = z.array(
z.object({
owner: z.string(),
name: z.string(),
})
);

const ENV = ENVSchema.parse(process.env);
const PUBLIC_REPOS = publicReposSchema.parse(JSON.parse(ENV.PUBLIC_REPOS));

const api = new Api(stack, "ExtractApi", {
defaults: {
Expand Down Expand Up @@ -195,6 +205,34 @@ export function ExtractStack({ stack }: StackContext) {
},
});

PUBLIC_REPOS.forEach(publicRepo => {
new Cron(stack, `${publicRepo.name}_ExtractCron`, {
schedule: "cron(00 10 * * ? *)",
job: {
function: {
handler: "src/extract/extract-repository.cronHandler",
environment: {
CRON_USER_ID: ENV.CRON_USER_ID,
PUBLIC_REPO_OWNER: publicRepo.owner,
PUBLIC_REPO_NAME: publicRepo.name,
},
bind: [
bus,
EXTRACT_DATABASE_URL,
EXTRACT_DATABASE_AUTH_TOKEN,
CRAWL_DATABASE_URL,
CRAWL_DATABASE_AUTH_TOKEN,
CLERK_SECRET_KEY,
REDIS_URL,
REDIS_TOKEN,
REDIS_USER_TOKEN_TTL
],
runtime: "nodejs18.x",
}
}
})
});

stack.addOutputs({
ApiEndpoint: api.url,
});
Expand Down
8 changes: 7 additions & 1 deletion turbo.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@
"CRAWL_DATABASE_URL",
"CRAWL_DATABASE_AUTH_TOKEN",
"NEXT_PUBLIC_EXTRACT_API_URL",
"NEXT_PUBLIC_TRANSFORM_API_URL"
"NEXT_PUBLIC_TRANSFORM_API_URL",
"PUBLIC_REPOS",
"CRON_USER_ID",
"PUBLIC_REPO_OWNER",
"PUBLIC_REPO_NAME",
"CLERK_JWT_ISSUER",
"CLERK_JWT_AUDIENCE"
]
}