Skip to content

Commit

Permalink
Merge pull request #4 from ankaboot-source/cli
Browse files Browse the repository at this point in the history
Add support for generic request on any port
  • Loading branch information
baderdean committed Aug 11, 2023
2 parents cfc0ed7 + c95fdca commit 7d8738a
Show file tree
Hide file tree
Showing 6 changed files with 291 additions and 134 deletions.
23 changes: 0 additions & 23 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,26 +54,3 @@ jobs:
- name: Run lints
run: |
hatch run test:lint
tests:
runs-on: ubuntu-latest
needs:
- precommit
strategy:
matrix:
python-version: [3.9]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
pip install hatch
- name: Build package
run: |
pip install jaxlib jax
- name: Run tests
run: |
hatch run test:test
23 changes: 21 additions & 2 deletions openweb_proxy/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,22 @@

from loguru import logger as log

from openweb_proxy import config
from .cli import parse_arguments
from .proxy_miner import ProxyMiner


def main() -> None:
"""
Entry point for the proxy miner application.
This function is the main entry point for the proxy miner application. It
reads command-line arguments, initializes the ProxyMiner instance with
appropriate configurations, performs benchmarking, loading, verification,
and cleaning of proxies, and provides information about the mined proxies.
:return: None
"""
args = parse_arguments()

log.remove(0)
Expand All @@ -24,8 +35,16 @@ def main() -> None:
pm_kwargs["protocol"] = args.protocol
if args.timeout:
pm_kwargs["timeout"] = args.timeout
if args.checker:
pm_kwargs["checker"] = args.checker

checker = config.CHECK_URLS
if args.http:
checker["http"] = args.http
if args.generic:
checker["generic"] = args.generic
if args.banned:
checker["banned"] = args.banned

pm_kwargs["checker"] = checker

pm = ProxyMiner(**pm_kwargs)
if args.bench:
Expand Down
47 changes: 38 additions & 9 deletions openweb_proxy/cli.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,35 @@
import argparse
import os

from openweb_proxy import config


def parse_arguments() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Proxy Miner - Mine and verify proxies from the web.")
"""
Parse command-line arguments for the OpenWeb Proxy application.
This function sets up and configures the argument parser for the OpenWeb Proxy
application. It defines command-line options related to proxy mining, loading,
and benchmarking. The parsed arguments are returned as a namespace object.
:return: argparse.Namespace - A namespace object containing the parsed arguments.
"""
parser = argparse.ArgumentParser(
description="""OpenWeb Proxy - generate working, fast and stealth proxy list -
#MakeTheWebOpenAgain
Copyright ankaboot.io""",
formatter_class=argparse.RawTextHelpFormatter,
)
parser.add_argument(
"proxies_file",
nargs="?",
default=config.PROXIES_FILE,
help="The file to load/save the proxies. Default is 'proxies.txt'.",
help=f"The file to load/save the proxies. Default is '{config.PROXIES_FILE }'.",
)
parser.add_argument(
"--web",
action="store_true",
help="Load proxies from the web if the specified file is empty or not provided.",
help="Load proxies from the web",
)
parser.add_argument(
"--bench",
Expand All @@ -24,21 +39,35 @@ def parse_arguments() -> argparse.Namespace:
parser.add_argument(
"--protocol",
choices=["https", "socks5"],
help="Protocol for the proxies. Choices: 'https' or 'socks5'. Default is 'socks5'.",
help="""Protocol for the proxies.
Choices: 'https' or 'socks5'. Default is 'socks5'.""",
)
parser.add_argument(
"--timeout",
type=float,
help="Timeout for requests in seconds. Default is 5 seconds.",
help=f"Timeout for requests in seconds. Default is {config.TIMEOUT} seconds.",
)
HTTP_HOST = config.CHECK_URLS["url"]
parser.add_argument(
"--checker",
help="URL to check if a proxy is working. Default is 'https://google.com'.",
"--http",
help=f"URL to check if a proxy is working. Default is '{HTTP_HOST}'.",
)
GENERIC_HOST = config.CHECK_URLS["generic"]
parser.add_argument(
"--generic",
help=f"""host:port format server to check if reachable via proxy.
Defaults to {GENERIC_HOST}.""",
)
parser.add_argument(
"--verbose",
choices=["DEBUG", "INFO", "WARNING", "ERROR"],
default="INFO",
help="Set the verbosity level. Choose from INFO, DEBUG, WARNING, or ERROR. Default is INFO.",
default=os.environ.get("LOG_LEVEL", "INFO"),
help="""Set the verbosity level.
Choose from INFO, DEBUG, WARNING, or ERROR. Default is INFO.
This can also be set using the LOG_LEVEL env var.""",
)
parser.add_argument(
"--banned",
help="""URL or FILE of exluded addresses""",
)
return parser.parse_args()
86 changes: 69 additions & 17 deletions openweb_proxy/config.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
from typing import Callable, Dict, List, Set, Union

import re
from bs4 import BeautifulSoup
from loguru import logger as log
import requests
from .random_ua_headers import random_ua_headers

PROXIES_FILE = "proxies.txt"
PROXY_FILE = "proxy.txt"
ISPROXY_URL = "http://ip-api.com/json/{ip}?fields=status,proxy"
ISPROXY_URL_BATCH = "http://ip-api.com/batch?fields=status,proxy,query"
MAX_ISPROXY_BATCH = 100
CHECK_URL = {
CHECK_URLS = {
"url": "https://google.com",
# 'url': "https://vpnapi.io/proxy-detection",
# 'not_proxy': "this IP address is not a proxy server",
"generic": "smtp.freesmtpservers.com:25",
"banned": "https://raw.githubusercontent.com/ankaboot-source/\
email-open-data/main/mailserver-banned-ips.txt",
}
CHECK_MAX = 100
MAX_CHECK_WORKERS = 20
Expand All @@ -19,18 +21,63 @@
TIMEOUT = 5
MAX_WORKERS = 10

BANNED_URL = "https://raw.githubusercontent.com/ankaboot-source/email-open-data/main/mailserver-banned-ips.txt"
RE_URL = re.compile(r"^https?://", re.IGNORECASE)
RE_IP_V4 = re.compile(
r"(?:^|\D)(({0}\.{1}\.{1}\.{1}):{2})(?!.)".format(
r"(?:[1-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])", # 1-255
r"(?:\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])", # 0-255
r"(?:\d|[1-9]\d{1,3}|[1-5]\d{4}|6[0-4]\d{3}"
+ r"|65[0-4]\d{2}|655[0-2]\d|6553[0-5])", # 0-65535
)
)


def _get_sslproxies(timeout: int = 0) -> set[str]:
"""Get HTTPS proxies from sslproxies.org"""
r = requests.get(
"https://www.sslproxies.org/", random_ua_headers(), timeout=timeout
)
soup = BeautifulSoup(r.text, "html.parser")
proxies_table = soup.find("table", class_="table-striped").tbody

proxies = set()
for row in proxies_table.find_all("tr"):
proxy = row.find_all("td")
ip = proxy[0].string
port = proxy[1].string
proxies.add(f"https://{ip}:{port}")
log.debug(f"🪲 Proxies sslproxies number: {len(proxies)}")
return proxies


def _get_clarketm(timeout: int = 0) -> set[str]:
"""Get HTTPS proxies from clarketm on github"""
r = requests.get(
"https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list.txt",
timeout=timeout,
)
proxies = set()
for proxy_l in r.text.splitlines()[6:-2]:
if "S" in proxy_l:
proxies.add(f"https://{proxy_l.split()[0]}")
log.debug(f"🪲 Proxies clarketm number: {len(proxies)}")
return proxies


def get_geonde_proxies(timeout: int) -> Set[str]:
def get_geonde_proxies(timeout: int) -> set[str]:
"""Downloads proxies from https://geonode.com/free-proxy-list"""
proxies, i = set(), 1
while True:
r = requests.get(
f"https://proxylist.geonode.com/api/proxy-list?limit=500&page={i}&sort_by=lastChecked&sort_type=desc",
timeout=timeout,
)
data = r.json()["data"]
try:
r = requests.get(
f"https://proxylist.geonode.com/api/proxy-list?limit=500\
&page={i}&sort_by=lastChecked&sort_type=desc",
timeout=timeout,
)
data = r.json()["data"]
except requests.exceptions.RequestException as e:
log.info(f"Genode Proxies stopped at page {i} with exception: {e}")
data = ""
if not data:
break
for element in data:
Expand All @@ -40,11 +87,13 @@ def get_geonde_proxies(timeout: int) -> Set[str]:
return proxies


PROXY_SOURCES: Dict[str, List[Union[str, Callable[[int], Set[str]]]]] = {
PROXY_SOURCES: dict[str, list] = {
"https": [
"https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/https.txt",
"https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt",
"https://spys.me/proxy.txt",
_get_sslproxies,
_get_clarketm,
],
"socks5": [
"https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt",
Expand All @@ -55,7 +104,8 @@ def get_geonde_proxies(timeout: int) -> Set[str]:
"https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt",
"https://raw.githubusercontent.com/User-R3X/proxy-list/main/online/socks5.txt",
"https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS5_RAW.txt",
"https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks5.txt",
"https://raw.githubusercontent.com/jetkai/proxy-list/main/\
online-proxies/txt/proxies-socks5.txt",
"https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/socks5.txt",
"https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks5",
"https://openproxy.space/list/socks5",
Expand All @@ -65,8 +115,10 @@ def get_geonde_proxies(timeout: int) -> Set[str]:
"https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks5.txt",
"https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks5.txt",
"https://raw.githubusercontent.com/saschazesiger/Free-Proxies/master/proxies/socks5.txt",
"https://raw.githubusercontent.com/UserR3X/proxy-list/main/socks5.txt",
"https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks5&timeout=10000&country=all&simplified=true",
"https://raw.githubusercontent.com/UserR3X/proxy-list/main/\
socks5.txt",
"https://api.proxyscrape.com/v2/?request=getproxies&\
protocol=socks5&timeout=10000&country=all&simplified=true",
"https://spys.me/socks.txt",
get_geonde_proxies,
],
Expand Down
Loading

0 comments on commit 7d8738a

Please sign in to comment.