Skip to content

Commit

Permalink
Merge pull request #10 from RyanJarv/master
Browse files Browse the repository at this point in the history
Pool close bug fix
TLS Verification option
Whitespace issue with URL's
Change default User Agent
  • Loading branch information
jordanpotti committed Mar 7, 2022
2 parents 596ceee + b38fe4a commit 9756278
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 8 deletions.
23 changes: 17 additions & 6 deletions CloudScraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from rfc3987 import parse
import itertools
import requests
import urllib3
import sys
import re

Expand Down Expand Up @@ -45,7 +46,7 @@ def start(target):
print(colored("Beginning search for cloud resources in {}".format(target), color='cyan'))

try:
html = requests.get(target, allow_redirects=True, headers=headers).text
html = requests.get(target, allow_redirects=True, headers=headers, verify=arguments.no_verify).text
links = gather_links(html)

except requests.exceptions.RequestException as e:
Expand All @@ -65,7 +66,7 @@ def worker(url):
'''
if url.count("/") <= arguments.depth+2:
try:
html = requests.get(url, allow_redirects=True, headers=headers).text
html = requests.get(url, allow_redirects=True, headers=headers, verify=arguments.no_verify).text
links = gather_links(html)

except requests.exceptions.RequestException as e:
Expand Down Expand Up @@ -121,6 +122,9 @@ def spider(base_urls, target):

print(colored('\nNew urls appended: {}\n'.format(i), 'green', attrs=['bold']))

p.close()
p.join()

#once all the links for the given depth have been analyzed, execute the parser
parser(base_urls)

Expand Down Expand Up @@ -152,8 +156,8 @@ def args():
parser.add_argument("-d", dest="depth", type=int, required=False, default=5, help="Max Depth of links Default: 5")
parser.add_argument("-l", dest="targetlist", required=False, help="Location of text file of Line Delimited targets")
parser.add_argument("-v", action="store_true", default=False, required=False, help="Verbose output")
#parser.add_argument("-t", dest="time", required=False, default=0, help="Time between GETs to avoid getting blocked")
parser.add_argument("-p", dest="process", required=False, default=2, type=int, help="Number of processes to run")
parser.add_argument("--no-verify", action="store_false", default=True, required=False, help="Skip TLS verification")
if len(sys.argv) == 1:
parser.error("No arguments given.")
parser.print_usage
Expand All @@ -165,22 +169,29 @@ def args():

def cleaner(url):
if 'http' not in url:
return ("https://"+url).rstrip()
return ("https://"+url).strip()
else:
return url
return url.strip()


def main():

if arguments.targetlist:
with open (arguments.targetlist, 'r') as target_list:
[start(cleaner(line)) for line in target_list]
else:
start(cleaner(arguments.URL))


headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'}
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
}
arguments = args()

# If we passed --no-verify then we likely don't care about insecure request warnings.
if arguments.no_verify:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

if __name__ == '__main__':
print_banner()
main()
10 changes: 10 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
FROM python

WORKDIR /usr/src/CloudScraper

COPY ./requirements.txt ./requirements.txt
RUN pip install -r requirements.txt

COPY ./ ./
ENTRYPOINT ["python3", "/usr/src/CloudScraper/CloudScraper.py"]

3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ This tool was inspired by a recent talk by [Bryce Kunz](https://twitter.com/Twee
-d DEPTH Max Depth of links Default: 5
-l TARGETLIST Location of text file of Line Delimited targets
-v Verbose Verbose output
-p Processes Number of processes to be executed in parallel. Default: 2
-p Processes Number of processes to be executed in parallel. Default: 2
--no-verify Skip TLS verification

example: python3 CloudScraper.py -u https://rottentomatoes.com

Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
rfc3987
termcolor
requests==2.20.0
requests
BeautifulSoup4
urllib3

0 comments on commit 9756278

Please sign in to comment.