In [None]:
# Author: Ahsan Aziz - Security Consultant - The Missing Link
# Date: 20/04/2022
# Version: 1.0
# Inspired by Omar's BSides presentation: https://www.youtube.com/watch?v=LTNKMA65BtI

# Description
Automating OSINT (and a bit of scanning). If everything goes well, it will produce a zip file with all the information.

# Setup
Prior to running any cells assign a value to the variables and run the cell. This will change the targets for enumeration without needing to modify the script parameters. 

In [None]:
SCAN_TYPE = "A" #P=Passive, no interaction with the org's assets (except DNS requests). N=Normal, non-malicious TCP/HTTP requests sent. A=Aggressive, a number of malicious HTTP requests sent for vulnerability identification.
DOMAIN = "tesla.com"  #root domain (required)
IP_RANGE = "184.30.18.0/24" #nmap format. If empty, the IPs of subdomains will be used
ORG_NAME = "tesla"  #this will be used in linkedIn search and cloud enumeration (required)
FOLDER_NAME = "tesla" #this folder will contain the discovered recon data (required)
VHOST = "tesla.com" #domain for virtual host scanning, leave it empty if vhost scan is not required
EMAIL_FORMAT = "{f}{last}" #e.g. {f}{last}, check hunter.io if you're not sure about the email format
DEHASHED_USER = "" #dehashed username
DEHASHED_KEY = ""  #dehashed API Key
CHAOS_KEY = "" #CHAOS API key for subdomain enum, request your key: https://forms.gle/GP5nTamxJPfiMaBn9
CENSYS_KEY = "" #Censys API key for subdomain enum, request your key: https://censys.io/register
CERTSPOTTER_KEY = "" #Certspotter API key for subdomain enum, request your key: https://sslmate.com/signup?for=certspotter_api
SECURITYTRAILS_KEY = "" #SecurityTrails API key for subdomain enum, request your key: https://securitytrails.com/app/signup
SHODAN_KEY = "" #Shodan API key for subdomain enum, request your key: https://account.shodan.io/login
URLSCAN_KEY = "" #URLScan API key for subdomain enum, request your key: https://urlscan.io/user/signup

In [None]:
print("Setting up project folders and files")
!mkdir $FOLDER_NAME
!nmap -sL -n $IP_RANGE | grep 'Nmap scan report for' | cut -f 5 -d ' ' > $FOLDER_NAME/IP_Range.txt
!mkdir $FOLDER_NAME/Screenshots
print ("Done!")

# NMAP
All port scan is not included as it may take a while, add flags as per your requirements.

In [None]:
if IP_RANGE != "" and SCAN_TYPE != "P":
        !nmap -Pn $IP_RANGE >> $FOLDER_NAME/nmap.txt

# Subdomain Enumeration

## Subfinder
subfinder: https://github.com/projectdiscovery/subfinder

In [None]:
#update config file with provider config
!echo "provider-config: ~/.config/subfinder/provider-config.yaml" >> ~/.config/subfinder/config.yaml
!echo "" > ~/.config/subfinder/provider-config.yaml

#update the API keys in provider config file
if CENSYS_KEY != "":
    !echo "censys:" >> ~/.config/subfinder/provider-config.yaml
    !echo " - $CENSYS_KEY" >> ~/.config/subfinder/provider-config.yaml
if CERTSPOTTER_KEY != "":
    !echo "certspotter:" >> ~/.config/subfinder/provider-config.yaml
    !echo " - $CERTSPOTTER_KEY" >> ~/.config/subfinder/provider-config.yaml
if SECURITYTRAILS_KEY != "":
    !echo "securitytrails:" >> ~/.config/subfinder/provider-config.yaml
    !echo " - $SECURITYTRAILS_KEY" >> ~/.config/subfinder/provider-config.yaml
if SHODAN_KEY != "":
    !echo "shodan:" >> ~/.config/subfinder/provider-config.yaml
    !echo " - $SHODAN_KEY" >> ~/.config/subfinder/provider-config.yaml
if URLSCAN_KEY != "":
    !echo "urlscan:" >> ~/.config/subfinder/provider-config.yaml
    !echo " - $URLSCAN_KEY" >> ~/.config/subfinder/provider-config.yaml

print("Scraping domains using subfinder...")
!subfinder -silent -d $DOMAIN -o subfinder.csv 
!cat subfinder.csv |anew $FOLDER_NAME/Subdomains.csv
#anew: https://github.com/tomnomnom/anew
!rm subfinder.csv
print("Done. The file ./{}/subdomains.csv is updated!".format(FOLDER_NAME))

## Amass

OWASP's Amass: https://github.com/OWASP/Amass 

In [None]:
if SCAN_TYPE != "P":
    print("Scraping domains using OWASP's amass...")
    !amass enum -d $DOMAIN -o amass.csv -r 8.8.8.8
    !cat amass.csv |anew $FOLDER_NAME/Subdomains.csv
    !rm amass.csv
    print("Done. The file ./{}/subdomains.csv is updated!".format(FOLDER_NAME))

## DNS database search - CHAOS and RAPID7 SONAR
Chaos: https://chaos.projectdiscovery.io/#/docs  
Rapid7 Sonar: https://sonar.omnisint.io/

crobat is currently commented out, it's not always available, run it if you'd like to enumerate domains from Rapid7's database.

In [None]:
#print("Querying Rapid7 Sonar using crobat...")
#!crobat -s $DOMAIN >> crobat.csv
#!cat crobat.csv |anew $FOLDER_NAME/Subdomains.csv    
#!rm crobat.csv
#print("Done. The file ./{}/Subdomains.csv is updated!".format(FOLDER_NAME))

if CHAOS_KEY != "":
    print("Querying CHAOS...")
    !chaos -key $CHAOS_KEY -d $DOMAIN -silent -o chaos.csv
    !cat chaos.csv |dnsx -silent|anew $FOLDER_NAME/Subdomains.csv
    !rm chaos.csv
    print("Done. The file ./{}/Subdomains.csv is updated!".format(FOLDER_NAME))

## Reverse DNS
Performing a reverse DNS on provided IP_RANGE using Rapid7 Sonar: https://sonar.omnisint.io/

##### Tip: You may want to pick a domain (other than root domain) from the output of this section and repeat the subdomain enumeration steps!

In [None]:
if IP_RANGE != "":
    print("Reverse querying Rapid7 Sonar using crobat...")
    !cp $FOLDER_NAME/IP_Range.txt ip.txt
    !crobat -r ip.txt >> crobat_reverse.csv
    !cat crobat_reverse.csv |anew $FOLDER_NAME/Subdomains.csv
    !rm crobat_reverse.csv
    !rm ip.txt
    print("Done. The file ./{}/Subdomains.csv is updated!".format(FOLDER_NAME))

## Subdomain Bruteforcing
shuffledns: https://github.com/projectdiscovery/shuffledns

In [None]:
if SCAN_TYPE == "A":
    print("Bruteforcing using shuffledns with commonspeak wordlist... ")
    !shuffledns -silent -d $DOMAIN -w ../commonspeak2.txt -r ../resolvers.txt >> shuffledns.csv
    !cat shuffledns.csv |anew $FOLDER_NAME/Subdomains.csv
    !rm shuffledns.csv
    print("Done. The file ./{}/Subdomains.csv is updated!".format(FOLDER_NAME))

## In-scope Domains
DNS probing subdomains.csv and filtering subdomains as per IP_RANGE

In [None]:
if IP_RANGE != "":
    print("First let's sort and get unique domains...")
    !sort -u $FOLDER_NAME/Subdomains.csv > uniq.csv
    !cp uniq.csv $FOLDER_NAME/Subdomains.csv
    !rm uniq.csv
    print("Done. The file ./{}/Subdomains.csv is updated!".format(FOLDER_NAME))
    print("")

In [None]:
if IP_RANGE != "":
    print("Now let's resolve the domains and compare it with the provided IP_RANGE...")
    !cp $FOLDER_NAME/Subdomains.csv subs.txt
    !cp $FOLDER_NAME/IP_Range.txt ip.txt
    #execute the comparison three times to make sure no subdomains is missed
    for i in range(3):
        !cat subs.txt |dnsx| while read line; do if grep -q -w `echo "$line" | dnsx -silent -a -resp |cut -d " " -f 2|cut -d "]" -f 1 |cut -d "[" -f 2` ip.txt;then echo $line >> tmp.csv;fi; done
    !rm ip.txt
    !rm subs.txt
    !cat tmp.csv |anew $FOLDER_NAME/In_Scope_Subdomains.csv
    print("Done. The file ./{}/In_Scope_Subdomains.csv is updated!".format(FOLDER_NAME))
    print("")

In [None]:
if IP_RANGE != "":
    print("Creating the csv with two columns (domain and the corresponding IP address)...")
    !cat tmp.csv | while read domain; do echo "$domain,`echo $domain | dnsx -silent -a -resp |cut -d " " -f 2|cut -d "]" -f 1 |cut -d "[" -f 2`" >> output.csv; done
    !cat output.csv |anew $FOLDER_NAME/In_Scope_Subdomains_IPs.csv
    !rm output.csv                                                                                                                                    
    !rm tmp.csv
    print("Done. The file ./{}/In_Scope_Subdomains_IPs.csv is updated!".format(FOLDER_NAME)) 

## Resolving domains

Checking if all subdomains resolve to IP addresses using dnsx, the domains which do not resolve may be vulnerable to takover.

dnsx: https://github.com/projectdiscovery/dnsx

In [None]:
print("Resolving domains using dnsx ... ")
!cat $FOLDER_NAME/Subdomains.csv|dnsx -silent|anew tmp.csv
print("Creating the csv with two columns (domain and the corresponding IP address)...")
!cat tmp.csv | while read domain; do echo "$domain,`echo $domain | dnsx -silent -a -resp |cut -d " " -f 2|cut -d "]" -f 1 |cut -d "[" -f 2`" >> output.csv; done
!cat output.csv |anew $FOLDER_NAME/Subdomains_Resolved.csv
print("Done. The file ./{}/Subdomains_Resolved.csv is updated!".format(FOLDER_NAME))                                                                                                                                     
!rm output.csv 
                                                                                                                                    
if IP_RANGE == "": 
    print("Creating Subdomains_IPs.csv file for saving IPs of discovered subdomains... ")                                                                                                                                   
    !dnsx -silent -a -resp -l tmp.csv|cut -d " " -f 2|cut -d "]" -f 1 |cut -d "[" -f 2 >> output.csv
    !cat output.csv |anew $FOLDER_NAME/Subdomain_IPs.csv
    !rm output.csv                                      
    print("Done. The file ./{}/Subdomains_IPs.csv is updated!".format(FOLDER_NAME))                                                                                                                                
!rm tmp.csv                                                                                                                                    

## Shodoan's Nrich

A command-line tool to quickly analyze all IPs in a file and see which ones have open ports/ vulnerabilities. 

nrich: https://gitlab.com/shodan-public/nrich

In [None]:
if IP_RANGE != "":
    print("Querying shodan's database for IP_RANGE provided...")
    !cp $FOLDER_NAME/IP_Range.txt ip.txt
else:
    print("Querying shodan's database for Subdomains_IPs...")
    !cp $FOLDER_NAME/Subdomain_IPs.csv ip.txt

!cat ip.txt |nrich - >> $FOLDER_NAME/Shodan_Nrich.txt 
!rm ip.txt    
print("Done. The file ./{}/Shodan_Nrich.txt is updated!".format(FOLDER_NAME)) 
print("Note: Shodan_Nrich.txt will not be part of final excel file!")

# Subdomain Takeover

Subzy: https://github.com/LukaSikic/subzy

##### Note: Since the cloud service providers keep updating their subdomain policies, there is no reliable automated tool to confirm subdomain takeover. Subzy may provide false positives. To get the latest information about subdomain takeover, head over to: https://github.com/EdOverflow/can-i-take-over-xyz/blob/master/README.md

In [None]:
print("Running subzy on all subdomains (and not just the domains in scope)...")
!cp $FOLDER_NAME/Subdomains.csv subs.csv
!subzy --hide_fails -targets subs.csv >> Subdomain_Takeover.txt 
!rm subs.csv
!cat Subdomain_Takeover.txt |anew $FOLDER_NAME/Subdomain_Takeover.txt
!rm Subdomain_Takeover.txt
print("Done. The file ./{}/Subdomain_Takeover.txt is updated!".format(FOLDER_NAME))
print("Note: Subdomain_Takeover.txt will not be part of final excel file!")

# HTTP Probing
Probing in-scope domains using httpx: https://github.com/projectdiscovery/httpx

In [None]:
if SCAN_TYPE != "P":
    print("Probing using httpx...")
    if IP_RANGE == "":   
        !cp $FOLDER_NAME/Subdomains.csv subs.csv
        !cat subs.csv |dnsx -silent |httpx -p -silent >> httpx.csv
        !cat httpx.csv |anew $FOLDER_NAME/Probed_Subdomains.csv
        print("Done. The file ./{}/Probed_Subdomains.csv is updated!".format(FOLDER_NAME))
    else:
        !cp $FOLDER_NAME/In_Scope_Subdomains.csv subs.csv
        !cat subs.csv |dnsx -silent|httpx -silent >> httpx.csv
        !cat httpx.csv |anew $FOLDER_NAME/Probed_In_Scope_Subdomains.csv
        print("Done. The file ./{}/Probed_In_Scope_Subdomains.csv is updated!".format(FOLDER_NAME))
    !rm subs.csv
    !rm httpx.csv

# Screenshoting

Eyewitness: https://github.com/FortyNorthSecurity/EyeWitness

In [None]:
%%bash -s "$IP_RANGE" "$FOLDER_NAME" "$SCAN_TYPE"
if [ "$3" != "P" ]; then
echo "Screenthoting probed domians using EyeWitness..."
if [ "$1" == "" ]; then
    cp $2/Probed_Subdomains.csv subs.csv
    python3 ../EyeWitness/Python/EyeWitness.py --delay 2 --no-prompt -f subs.csv -d $2/Screenshots/
    echo "Done. The screenshots for Probed_Subdomains are saved in ./$2/screenshots/report.html"
else 
    cp $2/Probed_In_Scope_Subdomains.csv subs.csv
    python3 ../EyeWitness/Python/EyeWitness.py --delay 2 --no-prompt -f subs.csv -d $2/Screenshots/
    echo "Done. The screenshots for Probed_In_Scope_Subdomains are saved in ./{}/screenshots/report.html"
fi
rm subs.csv
fi

# Nuclei Scan
Nuclei is a web scanner, it can detect technologies in use, identify CORS and TLS issues, and can scan for famous zero days such as log4j. Give it a go, it's pretty good!

**This might take sometime depending on the number of domains. It is a bit noisey, may send hundreds of GET/POST requests, be careful in a red team engagement.**

Nuclei: https://github.com/projectdiscovery/nuclei

In [None]:
if SCAN_TYPE == "A":
    print("Scanning probed domains using nuclei...")
    !nuclei -update
    !nuclei -update-templates #let's first update nuclei database
    if IP_RANGE == "":
        !cp $FOLDER_NAME/Probed_Subdomains.csv subs.csv
        !nuclei -l subs.csv |anew $FOLDER_NAME/Nuclei.txt
        print("Done. Nuclei resuts for Probed_Subdomains saved in  ./{}/Nuclei.txt".format(FOLDER_NAME))
    else:
        !cp $FOLDER_NAME/Probed_In_Scope_Subdomains.csv subs.csv
        !nuclei -l subs.csv |anew $FOLDER_NAME/Nuclei.txt
        print("Done. Nuclei resuts for Probed_In_Scope_Subdomains saved in  ./{}/Nuclei.txt".format(FOLDER_NAME))
    !rm subs.csv        
    print("Note: Nuclei.txt will not be part of final excel file!")

# VHOST Scanning 
If a reverse proxy like nginx is in use, some subdomains may not have DNS entries; using a fuzzer (ffuf in this case) we can try bruteforcing the virtual hosts with the host header. It may produce false positives as some web servers respond to all hosts.

FFUF: https://github.com/ffuf/ffuf

In [None]:
if SCAN_TYPE == "A":
    if VHOST != "":
        print("Bruteforcing vhosts using commonspeak wordlist. ... ")
        !ffuf -s -u "https://$VHOST" -w ../commonspeak2.txt -H "Host: FUZZ.$DOMAIN" -of csv -o ffuf.csv
        !cat ffuf.csv |sort -u |anew $FOLDER_NAME/Vhosts.csv
        !rm ffuf.csv
        print("Done. The file ./{}/Vhosts.csv is updated!".format(FOLDER_NAME))

# Cloud Storage Misconfigurations
Cloudenum: https://github.com/initstring/cloud_enum

If no bucket is found, try with different keywords.

### AWS cli commands:
```
aws s3 ls s3://{bucket} --no-sign-request
aws s3 cp abc.txt s3://{bucket}/abc.txt --no-sign-request
aws s3 rm s3://{bucket}/abc.txt --no-sign-request
```

In [None]:
print("Enumerating Amazon-S3, GCP and Azure buckets using given keyword/oganization-name... ")
!python3 ../cloud_enum/cloud_enum.py -k $ORG_NAME -t 50 -l cloudenum.txt
!cat cloudenum.txt |anew $FOLDER_NAME/Cloudenum.txt
!rm cloudenum.txt
print("Done. The file ./{}/Cloudenum.txt is updated!".format(FOLDER_NAME))
print("Note: Cloudenum.txt will not be part of final excel file!")


# Company Accounts
CrossLinked: https://github.com/m8r0wn/CrossLinked

Searching users on LinkedIn and creating email. 

For better results, run the following cell multiple times.

In [None]:
if EMAIL_FORMAT != "":
    print("Finding users on LinkedIn... ")
    !python3 ../crosslinked/crosslinked.py -f $EMAIL_FORMAT@$DOMAIN $ORG_NAME -o crosslinked.csv
    !cat crosslinked.csv |sort -u |anew $FOLDER_NAME/Emails.csv
    !rm crosslinked.csv
    print("Done. The file ./{}/Emails.csv is updated!".format(FOLDER_NAME))

## Breached database from Dehashed

https://dehashed.com

It will harvest credentials from breached databases, the Dehashed username and API key is required.  


In [None]:
%%bash  -s "$DOMAIN" "$DEHASHED_USER" "$DEHASHED_KEY" "$FOLDER_NAME"
if [ "$3" != "" ]; then
echo "Dumping breached databses from dehashed ..."
echo "id, email, username, password, hashed_password, name, database_name" >> $4/Dehashed.csv
curl "https://api.dehashed.com/search?query=domain:$1&size=4000" -u $2:$3 -H 'Accept: application/json' | jq -r '.entries[] | {id: .id,email: .email,username: .username,password: .password,hashed_password: .hashed_password,name: .name,database_name: .database_name} | select((.password != null and .password!= "") )' | jq -r '[.[]] | @csv'|anew $4/Dehashed.csv
echo "Done. $4/Dehashed.csv is updated!"
echo "Updating Email.csv with newly found Email addresses!"
grep -E -o "\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,6}\b" $4/Dehashed.csv |anew $4/Emails.csv
fi

# Excel and ZIP

In [None]:
!sudo pip3 install pandas

import pandas as pd
import os
import csv
import glob
import xlsxwriter
import openpyxl


#path to parse to and read files from
path = "/home/discovery/work/{}/".format(FOLDER_NAME)

#all files ending in .csv
all_files = glob.glob(os.path.join(path, "*.csv"))

#initialize writer
writer = pd.ExcelWriter('/home/discovery/work/' + FOLDER_NAME + '/' + FOLDER_NAME + '_OSINT.xlsx', engine='xlsxwriter', options={'strings_to_formulas': False})

#write all files into excel from dataframes and name worksheet by filename 
print("The following files are being parsed to " + path + ":") 
print("")
for f in all_files:
    if os.stat(f).st_size == 0:
        pass
    else:
        df = pd.read_csv(f)
        print(f)
        df.to_excel(writer, sheet_name=os.path.basename(f),index=False)

writer.save()  

print("")
print("Parsing of " + FOLDER_NAME + "_OSINT.xlsx Complete")


#delete csv files if excel creation was successful
if os.path.exists('/home/discovery/work/' + FOLDER_NAME + '/' + FOLDER_NAME + '_OSINT.xlsx'):
    !rm $FOLDER_NAME/*.csv

#create zip file containing all the results
import shutil
shutil.make_archive('/home/discovery/work/' + FOLDER_NAME, 'zip', '/home/discovery/work/' + FOLDER_NAME)
print("Results saved in " + FOLDER_NAME + ".zip in ~/work")