Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
f48b308
feat: bypass validation check shadow dom
RohitR311 Jul 28, 2025
b984228
feat: add turkish lang support
RohitR311 Jul 28, 2025
f7d17c9
feat: full support for turkish lang
RohitR311 Jul 28, 2025
f3d0894
feat: add rm integration option sheet selection
RohitR311 Jul 31, 2025
5989236
feat: sponsor & cloud
amhsirak Jul 31, 2025
72f0165
fix: text
amhsirak Jul 31, 2025
7f8f144
feat: variant outlined
amhsirak Jul 31, 2025
0987010
feat: oss sponsor text
amhsirak Jul 31, 2025
a3f4afd
feat: set width 600
amhsirak Jul 31, 2025
219eadc
feat: set width 600
amhsirak Jul 31, 2025
b8dcc2c
feat: set margin bottom
amhsirak Jul 31, 2025
a67d82e
feat: set margin bottom
amhsirak Jul 31, 2025
c8da160
feat: 8%
amhsirak Jul 31, 2025
297e846
feat: text
amhsirak Jul 31, 2025
2253203
feat: small text field
amhsirak Jul 31, 2025
e391ebc
feat: margin bottom 4
amhsirak Jul 31, 2025
fe99e9f
feat: better text
amhsirak Jul 31, 2025
944c0e1
chore: lint
amhsirak Jul 31, 2025
9af5f70
fix: whitespace
amhsirak Jul 31, 2025
072331c
feat: add sponsor links
amhsirak Jul 31, 2025
805e109
fix: remove caps
amhsirak Jul 31, 2025
26de22a
fix: add rel noopener noreferrer
amhsirak Jul 31, 2025
3fa00c8
Merge pull request #717 from getmaxun/sponsor
amhsirak Jul 31, 2025
89c7184
feat: add restart until manually stopped
RohitR311 Aug 4, 2025
47fc168
feat: continue other job execution on fail
RohitR311 Aug 4, 2025
780b18b
feat: page validity, continue if click fails
RohitR311 Aug 4, 2025
6dac082
feat: null checks for doc and iframe,frame
RohitR311 Aug 4, 2025
50fc737
Create self-hosting-docker.md
iamdoubz Aug 5, 2025
737b7ad
Create nginx.conf
iamdoubz Aug 5, 2025
c62390b
feat: always group table rows
RohitR311 Aug 5, 2025
2656476
feat: increase job execution duration to 23 hours
RohitR311 Aug 5, 2025
8287a81
feat: sync translations for all langs
RohitR311 Aug 6, 2025
fbeaa9d
feat: focused element typing
RohitR311 Aug 7, 2025
7e7b1ef
feat: add key press action backend
RohitR311 Aug 7, 2025
956e8a6
feat: add async schedule recording
RohitR311 Aug 8, 2025
e0707df
feat: update schedule on success
RohitR311 Aug 8, 2025
9c895c6
feat: return promise schedule recording
RohitR311 Aug 8, 2025
5e15b3a
Merge pull request #716 from getmaxun/rm-auth
amhsirak Aug 8, 2025
ecde9ba
Merge pull request #720 from getmaxun/auto-restart
amhsirak Aug 8, 2025
33d2c36
Merge pull request #722 from iamdoubz/develop
amhsirak Aug 8, 2025
d967fdc
Merge pull request #723 from getmaxun/tabular-fix
amhsirak Aug 8, 2025
af1032b
Merge pull request #724 from getmaxun/job-duration
amhsirak Aug 8, 2025
8a5e518
Merge pull request #727 from getmaxun/sync-lang
amhsirak Aug 8, 2025
3b16446
Merge pull request #728 from getmaxun/focus-fix
amhsirak Aug 8, 2025
a17d849
Merge pull request #730 from getmaxun/schedule-ui
amhsirak Aug 8, 2025
d5cefa7
chore: v0.0.21
amhsirak Aug 10, 2025
2048795
chore: core v0.0.21
amhsirak Aug 10, 2025
33bd429
chore: use maxun-core v0.0.21
amhsirak Aug 10, 2025
dff2892
Merge pull request #732 from getmaxun/pre-release-21
amhsirak Aug 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
services:
postgres:
image: postgres:13
restart: unless-stopped
environment:
POSTGRES_USER: ${DB_USER}
POSTGRES_PASSWORD: ${DB_PASSWORD}
Expand All @@ -17,6 +18,7 @@ services:

minio:
image: minio/minio
restart: unless-stopped
environment:
MINIO_ROOT_USER: ${MINIO_ACCESS_KEY}
MINIO_ROOT_PASSWORD: ${MINIO_SECRET_KEY}
Expand All @@ -32,6 +34,7 @@ services:
#context: .
#dockerfile: server/Dockerfile
image: getmaxun/maxun-backend:latest
restart: unless-stopped
ports:
- "${BACKEND_PORT:-8080}:${BACKEND_PORT:-8080}"
env_file: .env
Expand All @@ -58,6 +61,7 @@ services:
#context: .
#dockerfile: Dockerfile
image: getmaxun/maxun-frontend:latest
restart: unless-stopped
ports:
- "${FRONTEND_PORT:-5173}:${FRONTEND_PORT:-5173}"
env_file: .env
Expand Down
92 changes: 92 additions & 0 deletions docs/nginx.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Robust maxun nginx config file
# DO NOT uncomment commented lines unless YOU know what they mean and YOU know what YOU are doing!
### HTTP server block ###
server {
server_name maxun.my.domain;
root /usr/share/nginx/html;
listen 80;
server_tokens off;
return 301 https://$server_name$request_uri;
}
### HTTPS server block ###
server {
### Default config ###
server_name maxun.my.domain;
root /usr/share/nginx/html;
access_log /var/log/nginx/maxun_access.log;
error_log /var/log/nginx/maxun_error.log info;
listen 443 ssl;
http2 on;
server_tokens off;
### SSL config ###
ssl_certificate /etc/letsencrypt/live/my.domain/fullchain.pem;
ssl_certificate_key /etc/letsencrypt/live/my.domain/privkey.pem;
ssl_trusted_certificate /etc/letsencrypt/live/my.domain/chain.pem;
ssl_protocols TLSv1.2 TLSv1.3;
#ssl_ecdh_curve X25519MLKEM768:X25519:prime256v1:secp384r1;
ssl_ecdh_curve X25519:prime256v1:secp384r1;
ssl_prefer_server_ciphers off;
ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:DHE-RSA-CHACHA20-POLY1305;
ssl_stapling off;
ssl_stapling_verify off;
ssl_session_cache shared:MozSSL:10m;
ssl_session_tickets off;
ssl_session_timeout 1d;
ssl_dhparam dh.pem;
#ssl_conf_command Options KTLS;
### Performance tuning config ###
client_max_body_size 512M;
client_body_timeout 300s;
client_body_buffer_size 256k;
#pagespeed off;
### Compression ###
## gzip ##
gzip on;
gzip_vary on;
gzip_comp_level 5;
gzip_min_length 256;
gzip_disable msie6;
gzip_proxied expired no-cache no-store private no_last_modified no_etag auth;
gzip_buffers 16 8k;
gzip_types application/atom+xml text/javascript application/javascript application/json application/ld+json application/manifest+json application/rss+xml application/vnd.geo+json application/vnd.ms-fontobject application/wasm application/x-font-ttf application/x-web-app-manifest+json application/xhtml+xml application/xml font/opentype image/bmp image/svg+xml image/x-icon text/cache-manifest text/css text/plain text/vcard text/vnd.rim.location.xloc text/vtt text/x-component text/x-cross-domain-policy;
## brotli: enable only if you have compiled nginx with brotli support!!! ##
#brotli on;
#brotli_static on;
#brotli_comp_level 6;
#brotli_types application/atom+xml application/javascript application/json application/rss+xml
# application/vnd.ms-fontobject application/x-font-opentype application/x-font-truetype
# application/x-font-ttf application/x-javascript application/xhtml+xml application/xml
# font/eot font/opentype font/otf font/truetype image/svg+xml image/vnd.microsoft.icon
# image/x-icon image/x-win-bitmap text/css text/javascript text/plain text/xml;
### Default headers ###
add_header Referrer-Policy "no-referrer" always;
add_header X-Content-Type-Options "nosniff" always;
add_header X-Frame-Options "SAMEORIGIN" always;
add_header X-Permitted-Cross-Domain-Policies "none" always;
add_header X-Robots-Tag "noindex, nofollow" always;
add_header X-XSS-Protection "1; mode=block" always;
add_header Permissions-Policy "geolocation=(self), midi=(self), sync-xhr=(self), microphone=(self), camera=(self), magnetometer=(self), gyroscope=(self), fullscreen=(self), payment=(self), interest-cohort=()";
### Proxy rules ###
# Backend web traffic and websockets
location ~ ^/(auth|storage|record|workflow|robot|proxy|api-docs|api|webhook|socket.io)(/|$) {
proxy_pass http://localhost:8080; #Change the port number to match .env file BACKEND_PORT variable
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection 'upgrade';
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
# Frontend web traffic
location / {
proxy_pass http://localhost:5173; #Change the port number to match .env file FRONTEND_PORT variable
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection 'upgrade';
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
}
134 changes: 134 additions & 0 deletions docs/self-hosting-docker.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
# Self hosting docker guide

So you want to create a bot? Let's get you started!

## Requirements (not covered)
- Webserver (Apache2, nginx, etc.)
- SSL Certificates (letsencrypt, zerossl, etc)
- A sub-domain to host maxun i.e. maxun.my.domain
- Docker
- Docker compose
- Probably others...

## Guide
For this guide, we assume that before you start, you have a dedicated docker folder to house config files and everything else we need for persistence between docker container reboots and updates. The path in this guide is `/home/$USER/Docker/maxun`.
1. Change directory into your docker folder `cd /home/$USER/Docker/`
2. Create a new directory for maxun and all the required sub-folders for our docker services `mkdir -p maxun/{db,minio,redis}`
3. Change directory to enter the newly created folder `cd maxun`
4. Create an environment file to save your variables `nano .env` with the following contents:
```
NODE_ENV=production
JWT_SECRET=openssl rand -base64 48
DB_NAME=maxun
DB_USER=postgres
DB_PASSWORD=openssl rand -base64 24
DB_HOST=postgres
DB_PORT=5432
ENCRYPTION_KEY=openssl rand -base64 64
SESSION_SECRET=openssl rand -base64 48
MINIO_ENDPOINT=minio
MINIO_PORT=9000
MINIO_CONSOLE_PORT=9001
MINIO_ACCESS_KEY=minio
MINIO_SECRET_KEY=openssl rand -base64 24
REDIS_HOST=maxun-redis
REDIS_PORT=6379
REDIS_PASSWORD=
BACKEND_PORT=8080
FRONTEND_PORT=5173
BACKEND_URL=https://maxun.my.domain
PUBLIC_URL=https://maxun.my.domain
VITE_BACKEND_URL=https://maxun.my.domain
VITE_PUBLIC_URL=https://maxun.my.domain
GOOGLE_CLIENT_ID=
GOOGLE_CLIENT_SECRET=
GOOGLE_REDIRECT_URI=
AIRTABLE_CLIENT_ID=
AIRTABLE_REDIRECT_URI=
MAXUN_TELEMETRY=true
```
5. Ctrl + x, Y, Enter will save your changes
6. Please be sure to READ this file and change the variables to match your environment!!! i.e. BACKEND_PORT=30000
7. Create a file for docker compose `nano docker-compose.yml` with the following contents:
```yml
services:
postgres:
image: postgres:17
container_name: maxun-postgres
mem_limit: 512M
environment:
POSTGRES_USER: ${DB_USER}
POSTGRES_PASSWORD: ${DB_PASSWORD}
POSTGRES_DB: ${DB_NAME}
volumes:
- /home/$USER/Docker/maxun/db:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
interval: 10s
timeout: 5s
retries: 5

redis:
image: docker.io/library/redis:7
container_name: maxun-redis
restart: always
mem_limit: 128M
volumes:
- /home/$USER/Docker/maxun/redis:/data

minio:
image: minio/minio
container_name: maxun-minio
mem_limit: 512M
environment:
MINIO_ROOT_USER: ${MINIO_ACCESS_KEY}
MINIO_ROOT_PASSWORD: ${MINIO_SECRET_KEY}
command: server /data --console-address :${MINIO_CONSOLE_PORT:-9001}
volumes:
- /home/$USER/Docker/maxun/minio:/data

backend:
image: getmaxun/maxun-backend:latest
container_name: maxun-backend
ports:
- "127.0.0.1:${BACKEND_PORT:-8080}:${BACKEND_PORT:-8080}"
env_file: .env
environment:
BACKEND_URL: ${BACKEND_URL}
PLAYWRIGHT_BROWSERS_PATH: /ms-playwright
PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD: 0
# DEBUG: pw:api
# PWDEBUG: 1 # Enables debugging
CHROMIUM_FLAGS: '--disable-gpu --no-sandbox --headless=new'
security_opt:
- seccomp=unconfined # This might help with browser sandbox issues
shm_size: '2gb'
mem_limit: 4g
depends_on:
- postgres
- minio
volumes:
- /var/run/dbus:/var/run/dbus

frontend:
image: getmaxun/maxun-frontend:latest
container_name: maxun-frontend
mem_limit: 512M
ports:
- "127.0.0.1:${FRONTEND_PORT:-5173}:5173"
env_file: .env
environment:
PUBLIC_URL: ${PUBLIC_URL}
BACKEND_URL: ${BACKEND_URL}
depends_on:
- backend
```
8. Ctrl + x, Y, Enter will save your changes
9. This particular setup is "production ready" meaning that maxun is only accessible from localhost. You must configure a reverse proxy to access it!
10. Start maxun `sudo docker compose up -d` or `sudo docker-compose up -d`
11. Wait 30 seconds for everything to come up
12. Access your maxun instance at http://localhost:5173 if using defaults

## Next steps
You will want to configure a reverse proxy. Click on a link below to check out some examples.
- [Nginx](nginx.conf)
2 changes: 1 addition & 1 deletion maxun-core/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "maxun-core",
"version": "0.0.20",
"version": "0.0.21",
"description": "Core package for Maxun, responsible for data extraction",
"main": "build/index.js",
"typings": "build/index.d.ts",
Expand Down
21 changes: 18 additions & 3 deletions maxun-core/src/browserSide/scraper.js
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,11 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,

const evaluateXPath = (document, xpath, isShadow = false) => {
try {
if (!document || !xpath) {
console.warn('Invalid document or xpath provided to evaluateXPath');
return null;
}

const result = document.evaluate(
xpath,
document,
Expand Down Expand Up @@ -632,6 +637,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
return null;
} catch (err) {
console.error("Critical XPath failure:", xpath, err);
// Return null instead of throwing to prevent crashes
return null;
}
};
Expand Down Expand Up @@ -694,16 +700,25 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
for (let i = 0; i < parts.length; i++) {
if (!currentElement) return null;

// Handle iframe and frame traversal
// Handle iframe and frame traversal with enhanced safety
if (
currentElement.tagName === "IFRAME" ||
currentElement.tagName === "FRAME"
) {
try {
// Check if frame is accessible
if (!currentElement.contentDocument && !currentElement.contentWindow) {
console.warn('Frame is not accessible (cross-origin or unloaded)');
return null;
}

const frameDoc =
currentElement.contentDocument ||
currentElement.contentWindow.document;
if (!frameDoc) return null;
currentElement.contentWindow?.document;
if (!frameDoc) {
console.warn('Frame document is not available');
return null;
}

if (isXPathSelector(parts[i])) {
currentElement = evaluateXPath(frameDoc, parts[i]);
Expand Down
Loading