Skip to content

Commit

Permalink
feature: Recrawl failed links from admin UI (#95)
Browse files Browse the repository at this point in the history
* feature: Retry failed crawling URLs

* fix: Enhancing visuals and some minor changes.
  • Loading branch information
AhmadMuj committed Apr 11, 2024
1 parent 238c296 commit 95cf8f4
Show file tree
Hide file tree
Showing 8 changed files with 1,067 additions and 25 deletions.
26 changes: 21 additions & 5 deletions apps/web/app/dashboard/admin/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ function ReleaseInfo() {

function ActionsSection() {
const { mutate: recrawlLinks, isPending: isRecrawlPending } =
api.admin.recrawlAllLinks.useMutation({
api.admin.recrawlLinks.useMutation({
onSuccess: () => {
toast({
description: "Recrawl enqueued",
Expand Down Expand Up @@ -103,7 +103,15 @@ function ActionsSection() {
className="lg:w-1/2"
variant="destructive"
loading={isRecrawlPending}
onClick={() => recrawlLinks()}
onClick={() => recrawlLinks({ crawlStatus: "failure" })}
>
Recrawl Failed Links Only
</ActionButton>
<ActionButton
className="lg:w-1/2"
variant="destructive"
loading={isRecrawlPending}
onClick={() => recrawlLinks({ crawlStatus: "all" })}
>
Recrawl All Links
</ActionButton>
Expand Down Expand Up @@ -153,18 +161,26 @@ function ServerStatsSection() {
<Separator />
<p className="text-xl">Background Jobs</p>
<Table className="lg:w-1/2">
<TableHeader>
<TableHead>Job</TableHead>
<TableHead>Pending</TableHead>
<TableHead>Failed</TableHead>
</TableHeader>
<TableBody>
<TableRow>
<TableCell className="lg:w-2/3">Pending Crawling Jobs</TableCell>
<TableCell className="lg:w-2/3">Crawling Jobs</TableCell>
<TableCell>{serverStats.pendingCrawls}</TableCell>
<TableCell>{serverStats.failedCrawls}</TableCell>
</TableRow>
<TableRow>
<TableCell>Pending Indexing Jobs</TableCell>
<TableCell>Indexing Jobs</TableCell>
<TableCell>{serverStats.pendingIndexing}</TableCell>
<TableCell>{serverStats.failedIndexing}</TableCell>
</TableRow>
<TableRow>
<TableCell>Pending OpenAI Jobs</TableCell>
<TableCell>OpenAI Jobs</TableCell>
<TableCell>{serverStats.pendingOpenai}</TableCell>
<TableCell>{serverStats.failedOpenai}</TableCell>
</TableRow>
</TableBody>
</Table>
Expand Down
20 changes: 20 additions & 0 deletions apps/workers/crawlerWorker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,17 +124,37 @@ export class CrawlerWorker {
worker.on("completed", (job) => {
const jobId = job?.id ?? "unknown";
logger.info(`[Crawler][${jobId}] Completed successfully`);
const bookmarkId = job?.data.bookmarkId;
if (bookmarkId) {
changeBookmarkStatus(bookmarkId, "success");
}
});

worker.on("failed", (job, error) => {
const jobId = job?.id ?? "unknown";
logger.error(`[Crawler][${jobId}] Crawling job failed: ${error}`);
const bookmarkId = job?.data.bookmarkId;
if (bookmarkId) {
changeBookmarkStatus(bookmarkId, "failure");
}
});

return worker;
}
}

async function changeBookmarkStatus(
bookmarkId: string,
crawlStatus: "success" | "failure",
) {
await db
.update(bookmarkLinks)
.set({
crawlStatus,
})
.where(eq(bookmarkLinks.id, bookmarkId));
}

async function getBookmarkUrl(bookmarkId: string) {
const bookmark = await db.query.bookmarkLinks.findFirst({
where: eq(bookmarkLinks.id, bookmarkId),
Expand Down
3 changes: 3 additions & 0 deletions packages/db/drizzle/0017_slippery_senator_kelly.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ALTER TABLE bookmarkLinks ADD `crawlStatus` text DEFAULT 'pending';--> statement-breakpoint
UPDATE bookmarkLinks SET crawlStatus = 'failure' where htmlContent is null;--> statement-breakpoint
UPDATE bookmarkLinks SET crawlStatus = 'success' where htmlContent is not null;
Loading

0 comments on commit 95cf8f4

Please sign in to comment.