Skip to content

Commit

Permalink
feat: 🎸 serverless browser
Browse files Browse the repository at this point in the history
  • Loading branch information
gmpetrov committed Oct 11, 2023
1 parent 4919d9e commit 3d2415e
Show file tree
Hide file tree
Showing 3 changed files with 226 additions and 2 deletions.
2 changes: 2 additions & 0 deletions apps/dashboard/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
"@react-pdf-viewer/search": "^3.12.0",
"@sentry/nextjs": "^7.66.0",
"@slack/web-api": "^6.8.1",
"@sparticuz/chromium": "^117.0.0",
"@tailwindcss/typography": "^0.5.9",
"@tanstack/react-virtual": "3.0.0-beta.54",
"@types/node": "18.15.10",
Expand Down Expand Up @@ -97,6 +98,7 @@
"perf_hooks": "^0.0.1",
"pino-pretty": "^10.2.0",
"playwright": "^1.32.3",
"puppeteer-core": "10.1.0",
"radash": "^10.7.1",
"react": "18.2.0",
"react-dom": "18.2.0",
Expand Down
47 changes: 47 additions & 0 deletions apps/dashboard/pages/api/browser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import chromium from '@sparticuz/chromium';
import { NextApiResponse } from 'next';
import puppeteer from 'puppeteer-core';

import { createApiHandler, respond } from '@chaindesk/lib/createa-api-handler';
import { getTextFromHTML } from '@chaindesk/lib/loaders/web-page';
import { AppNextApiRequest } from '@chaindesk/lib/types';

const handler = createApiHandler();

const LOCAL_CHROME_EXECUTABLE = '/opt/homebrew/bin/chromium';

export const browser = async (req: AppNextApiRequest, res: NextApiResponse) => {
const url = req.query.url as string;

const customUserAgent =
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36';

const browser = await puppeteer.launch({
executablePath: await chromium.executablePath(),
args: chromium.args,
headless: true,
});

const page = await browser.newPage();
await page.setUserAgent(customUserAgent);
await page.goto(url, { waitUntil: 'networkidle0', timeout: 100000 });

let content = await page.content();
let text = (await getTextFromHTML(content))?.trim();

if (!text) {
console.log("not text parssed from html, let's try again after 10 seconds");
await page.waitForTimeout(10000);
}

const result = await page.content();

return {
html: result,
text,
};
};

handler.get(respond(browser));

export default handler;
Loading

1 comment on commit 3d2415e

@vercel
Copy link

@vercel vercel bot commented on 3d2415e Oct 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

browser – ./apps/dashboard

browser-git-main-databerry.vercel.app
browser-databerry.vercel.app
browser-blue.vercel.app

Please sign in to comment.