Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 22 additions & 15 deletions .actor/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
# Specify the base Docker image. You can read more about
# the available images at https://crawlee.dev/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node-playwright-chrome:22-1.46.0 AS builder
# use node base image as builder to speed up the build step instead of usiging the full playwright image
FROM apify/actor-node:22 AS builder
# override the default working directory set in the base image
WORKDIR /home/myuser

# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
Expand All @@ -18,12 +21,17 @@ COPY --chown=myuser . ./
# Don't audit to speed up the installation.
RUN npm run build

# Build Ghostery blockers for content filtering
RUN npm run build:playwright-blockers

# Create final image
FROM apify/actor-node-playwright-firefox:22-1.46.0
FROM apify/actor-node-playwright-firefox:22-1.54.1

# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY --chown=myuser package*.json ./
COPY --chown=myuser policies.json ./
COPY --chown=myuser patches ./patches

# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
Expand All @@ -38,28 +46,27 @@ RUN npm --quiet set progress=false \
&& npm --version \
&& rm -r ~/.npm

# Remove the existing firefox installation
RUN rm -rf ${PLAYWRIGHT_BROWSERS_PATH}/*

# Install all required playwright dependencies for firefox
RUN npx playwright install firefox
# symlink the firefox binary to the root folder in order to bypass the versioning and resulting browser launch crashes.
RUN ln -s ${PLAYWRIGHT_BROWSERS_PATH}/firefox-*/firefox/firefox ${PLAYWRIGHT_BROWSERS_PATH}/

# Overrides the dynamic library used by Firefox to determine trusted root certificates with p11-kit-trust.so, which loads the system certificates.
RUN rm $PLAYWRIGHT_BROWSERS_PATH/firefox-*/firefox/libnssckbi.so
RUN ln -s /usr/lib/x86_64-linux-gnu/pkcs11/p11-kit-trust.so $(ls -d $PLAYWRIGHT_BROWSERS_PATH/firefox-*)/firefox/libnssckbi.so

# Copy built JS files from builder image
COPY --from=builder --chown=myuser /home/myuser/dist ./dist

# Copy Ghostery blockers from builder image
COPY --from=builder --chown=myuser /home/myuser/blockers ./blockers

# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY --chown=myuser . ./

# Edit the TZ environment variable to set the timezone in the container.
# Most of the proxy traffic is from the US, so we set the timezone to New York.
# which can help with the bot-detection mechanisms of some websites.
ENV TZ=America/New_York

# Configure Firefox policies
ENV PLAYWRIGHT_FIREFOX_POLICIES_JSON="/home/myuser/policies.json"

# Disable experimental feature warning from Node.js
ENV NODE_NO_WARNINGS=1

# Run the image.
CMD npm run start:prod --silent
CMD ["npm", "run", "start:prod", "--silent"]
4 changes: 4 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,7 @@ node_modules
data
src/storage
dist

# Ghostery blockers (will be rebuilt in Docker)
blockers

3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,6 @@ storage
# Actor run input
input.json
INPUT.json

# Ghostery blockers (generated during build)
blockers/**
Loading