Permalink
Browse files

Added python-whois - a better library for handling whois information

  • Loading branch information...
0 parents commit 3d793a0914612c9866f16321a3047495ba465d9d @mkaziz mkaziz committed Nov 23, 2012
Showing with 668 additions and 0 deletions.
  1. +8 −0 MANIFEST
  2. +40 −0 README
  3. +38 −0 setup.py
  4. +57 −0 test.py
  5. +82 −0 whois/_1_query.py
  6. +50 −0 whois/_2_parse.py
  7. +101 −0 whois/_3_adjust.py
  8. +56 −0 whois/__init__.py
  9. +236 −0 whois/tld_regexpr.py
@@ -0,0 +1,8 @@
+# file GENERATED by distutils, do NOT edit
+README
+setup.py
+whois/_1_query.py
+whois/_2_parse.py
+whois/_3_adjust.py
+whois/__init__.py
+whois/tld_regexpr.py
@@ -0,0 +1,40 @@
+WHOIS
+=============
+
+Python module/library for retrieving WHOIS information of domains.
+
+By DDarko http://ddarko.org/
+
+
+Features
+=============
+
+ * Python wrapper for Linux "whois" command
+ * simple interface to access parsed WHOIS data for a given domain
+ * able to extract data for all the popular TLDs (com, org, net, biz, info, pl, jp, uk, nz, ...)
+ * query a WHOIS server directly instead of going through an intermediate web service like many others do
+ * works with Python 2.4+ and Python 3.x
+ * all dates as datetime objects
+ * possibility to cache results
+
+
+Usage example
+=============
+
+>>> import whois
+>>> domain = whois.query('google.com')
+
+>>> print(domain.__dict__)
+{
+ 'expiration_date': datetime.datetime(2020, 9, 14, 0, 0),
+ 'last_updated': datetime.datetime(2011, 7, 20, 0, 0),
+ 'registrar': 'MARKMONITOR INC.',
+ 'name': 'google.com',
+ 'creation_date': datetime.datetime(1997, 9, 15, 0, 0)
+}
+
+>>> print(domain.name)
+google.com
+
+>>> print(domain.expiration_date)
+2020-09-14 00:00:00
@@ -0,0 +1,38 @@
+from distutils.core import setup
+
+setup(
+ name='whois',
+ version='0.6.5',
+ description='Python module/library for retrieving WHOIS information of domains.',
+ long_description = open('README').read(),
+ author='DDarko.org',
+ author_email='ddarko@ddarko.org',
+ license='MIT http://www.opensource.org/licenses/mit-license.php',
+ url='http://code.google.com/p/python-whois/',
+ platforms = ['any'],
+ packages=['whois'],
+ keywords=['Python','WHOIS','TLD','domain','expiration','registrar'],
+ classifiers=[
+ 'License :: OSI Approved :: MIT License',
+ 'Intended Audience :: Developers',
+ 'Environment :: Console',
+ 'Programming Language :: Python',
+ 'Programming Language :: Python :: 2.5',
+ 'Programming Language :: Python :: 2.6',
+ 'Programming Language :: Python :: 2.7',
+ 'Programming Language :: Python :: 3',
+ 'Programming Language :: Python :: 3.0',
+ 'Programming Language :: Python :: 3.1',
+ 'Programming Language :: Python :: 3.2',
+ 'Topic :: Internet',
+ 'Topic :: Software Development :: Libraries :: Python Modules',
+ ],
+)
+
+'''
+test_suite='testsuite',
+entry_points="""
+[console_scripts]
+cmd = package:main
+""",
+'''
@@ -0,0 +1,57 @@
+import whois
+from pprint import pprint
+
+
+domains = '''
+www.google.com
+www.fsdfsdfsdfsd.google.com
+digg.com
+imdb.com
+microsoft.com
+
+www.google.org
+ddarko.org
+
+google.net
+www.asp.net
+
+google.pl
+www.ddarko.pl
+
+google.co.uk
+
+google.jp
+www.google.co.jp
+
+google.co
+google.de
+yandex.ru
+google.us
+google.eu
+google.me
+google.be
+google.biz
+google.info
+google.name
+
+google.it
+google.cz
+google.fr
+
+dfsdfsfsdf
+test.ez.lv
+'''
+
+#domains = ''
+
+
+for d in domains.split('\n'):
+ if d:
+ print('-'*80)
+ print(d)
+ w = whois.query(d, ignore_returncode=1)
+ if w:
+ wd = w.__dict__
+ for k, v in wd.items():
+ print('%20s\t"%s"' % (k, v))
+
@@ -0,0 +1,82 @@
+import subprocess
+import time
+import sys
+import os
+
+PYTHON_VERSION = sys.version_info[0]
+CACHE = {}
+CACHE_MAX_AGE = 60*60*48 # 48h
+
+try:
+ import json
+
+except:
+ import simplejson as json
+
+
+
+
+def cache_load(cf):
+ if not os.path.isfile(cf): return
+ global CACHE
+ f = open(cf, 'r')
+ try: CACHE = json.load(f)
+ except: pass
+ f.close()
+
+
+def cache_save(cf):
+ global CACHE
+ f = open(cf, 'w')
+ json.dump(CACHE, f)
+ f.close()
+
+
+
+
+
+def do_query(dl, force=0, cache_file=None, slow_down=0, ignore_returncode=0):
+ k = '.'.join(dl)
+ if cache_file: cache_load(cache_file)
+ if force or k not in CACHE or CACHE[k][0] < time.time() - CACHE_MAX_AGE:
+ CACHE[k] = (
+ int(time.time()),
+ _do_whois_query(dl, ignore_returncode),
+ )
+ if cache_file: cache_save(cache_file)
+ if slow_down: time.sleep(slow_down)
+
+ return CACHE[k][1]
+
+
+
+
+def _do_whois_query(dl, ignore_returncode):
+ """
+ Linux 'whois' command wrapper
+ """
+ p = subprocess.Popen(['whois', '.'.join(dl)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ r = p.communicate()[0]
+ r = r.decode() if PYTHON_VERSION == 3 else r
+ if not ignore_returncode and p.returncode != 0: raise Exception(r)
+ return r
+
+
+
+"""
+import socket
+
+def _do_whois_query(dl):
+ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ s.connect((('%s.whois-servers.net' % dl[-1], 43)))
+ s.send(("%s\r\n" % '.'.join(dl)).encode())
+
+ response = []
+ while 1:
+ t = s.recv(4096)
+ response.append(t)
+ if t == b'': break
+
+ s.close()
+ return b''.join(response).decode()
+"""
@@ -0,0 +1,50 @@
+from . import tld_regexpr
+import re
+
+
+TLD_RE = {}
+def get_tld_re(tld):
+ if tld in TLD_RE: return TLD_RE[tld]
+ v = getattr(tld_regexpr, tld)
+ extend = v.get('extend')
+ if extend:
+ e = get_tld_re(extend)
+ tmp = e.copy()
+ tmp.update(v)
+
+ else:
+ tmp = v
+
+ if 'extend' in tmp: del tmp['extend']
+ TLD_RE[tld] = dict((k, re.compile(v, re.IGNORECASE) if isinstance(v, str) else v) for k, v in tmp.items())
+ return TLD_RE[tld]
+
+
+[get_tld_re(tld) for tld in dir(tld_regexpr) if tld[0] != '_']
+
+
+#from pprint import pprint
+
+
+def do_parse(whois_str, tld):
+ r = {}
+
+ if whois_str.count('\n') < 5:
+ s = whois_str.strip().lower()
+ if s == 'not found': return
+ if s.count('error'): return
+ raise Exception(whois_str)
+
+ sn = re.findall(r'Server Name:\s?(.+)', whois_str, re.IGNORECASE)
+ if sn:
+ whois_str = whois_str[whois_str.find('Domain Name:'):]
+
+ for k, v in TLD_RE.get(tld, TLD_RE['com']).items():
+ if v is None:
+ r[k] = ['']
+
+ else:
+ r[k] = v.findall(whois_str) or ['']
+
+ #pprint(r)
+ return r
@@ -0,0 +1,101 @@
+import re
+import sys
+import datetime
+
+PYTHON_VERSION = sys.version_info[0]
+
+
+
+class Domain:
+
+ def __init__(self, data):
+ self.name = data['domain_name'][0].strip().lower()
+ self.registrar = data['registrar'][0].strip()
+ self.creation_date = str_to_date(data['creation_date'][0])
+ self.expiration_date = str_to_date(data['expiration_date'][0])
+ self.last_updated = str_to_date(data['updated_date'][0])
+
+ #----------------------------------
+ # name_servers
+ tmp = []
+ for x in data['name_servers']:
+ if isinstance(x, str): tmp.append(x)
+ else:
+ for y in x: tmp.append(y)
+
+ self.name_servers = set()
+ for x in tmp:
+ x = x.strip(' .')
+ if x:
+ if ' ' in x:
+ x, _ = x.split(' ', 1)
+ x = x.strip(' .')
+
+ self.name_servers.add(x.lower())
+
+ #----------------------------------
+
+
+
+
+
+
+
+
+
+
+
+
+# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
+DATE_FORMATS = [
+ '%d-%b-%Y', # 02-jan-2000
+ '%d.%m.%Y', # 02.02.2000
+ '%d/%m/%Y', # 01/06/2011
+ '%Y-%m-%d', # 2000-01-02
+ '%Y.%m.%d', # 2000.01.02
+ '%Y/%m/%d', # 2005/05/30
+
+ '%Y.%m.%d %H:%M:%S', # 2002.09.19 13:00:00
+ '%Y%m%d %H:%M:%S', # 20110908 14:44:51
+ '%Y-%m-%d %H:%M:%S', # 2011-09-08 14:44:51
+ '%d.%m.%Y %H:%M:%S', # 19.09.2002 13:00:00
+ '%d-%b-%Y %H:%M:%S %Z', # 24-Jul-2009 13:20:03 UTC
+ '%Y/%m/%d %H:%M:%S (%z)', # 2011/06/01 01:05:01 (+0900)
+ '%Y/%m/%d %H:%M:%S', # 2011/06/01 01:05:01
+ '%a %b %d %H:%M:%S %Z %Y', # Tue Jun 21 23:59:59 GMT 2011
+ '%a %b %d %Y', # Tue Dec 12 2000
+ '%Y-%m-%dT%H:%M:%S', # 2007-01-26T19:10:31
+ '%Y-%m-%dT%H:%M:%SZ', # 2007-01-26T19:10:31Z
+ '%Y-%m-%dT%H:%M:%S%z', # 2011-03-30T19:36:27+0200
+ '%Y-%m-%dT%H:%M:%S.%f%z', # 2011-09-08T14:44:51.622265+03:00
+ '%Y-%m-%dt%H:%M:%S.%f', # 2011-09-08t14:44:51.622265
+]
+
+
+def str_to_date(s):
+ s = s.strip().lower()
+ if not s or s == 'not defined': return
+
+ s = s.replace('(jst)', '(+0900)')
+ s = re.sub('(\+[0-9]{2}):([0-9]{2})', '\\1\\2', s)
+
+ if PYTHON_VERSION < 3: return str_to_date_py2(s)
+
+ for format in DATE_FORMATS:
+ try: return datetime.datetime.strptime(s, format)
+ except ValueError as e: pass
+
+ raise ValueError("Unknown date format: '%s'" % s)
+
+
+def str_to_date_py2(s):
+ tmp = re.findall('\+([0-9]{2})00', s)
+ if tmp: tz = int(tmp[0])
+ else: tz = 0
+
+ for format in DATE_FORMATS:
+ try: return datetime.datetime.strptime(s, format) + datetime.timedelta(hours=tz)
+ except ValueError as e: pass
+
+ raise ValueError("Unknown date format: '%s'" % s)
+
Oops, something went wrong.

0 comments on commit 3d793a0

Please sign in to comment.