#### Summary

#### imports

In [1]:
import requests
import pandas as pd
from pprint import pprint
import pickle
from datetime import datetime
import time
import json
import whois
import itertools
import re
from colorama import init, Fore, Style
import xmltodict
import matplotlib.pyplot as plt
import networkx as nx
from tldextract import extract

#### Read in newly registered domains(NRD) from the last 10 days

In [2]:
newly_registered_domains=set()
nrd_url='https://cdn.jsdelivr.net/gh/hagezi/dns-blocklists@latest/adblock/nrds.10.txt'
response=requests.get(nrd_url)
response_text=response.text
lines=response_text.split('\n')
for line in lines:
    if line.startswith('||'):
        newly_registered_domains.add(line[2:-1])

In [3]:
print('Number of newly registered domains(in the past 10 days): {}'.format(len(newly_registered_domains)))

Number of newly registered domains(in the past 10 days): 997986


### Now we'll look for possible suspicious domains by looking for certain keywords

In [4]:
possible_suspicious_domains=set()
keywords_for_suspicious_domains_keywords=['login','microsoft','paypal']
for domain in newly_registered_domains:
    for keyword in keywords_for_suspicious_domains_keywords:
        if keyword in domain:
            print('Possible suspicious domain:{}'.format(domain))
            possible_suspicious_domains.add(domain)

Possible suspicious domain:deelogin.com
Possible suspicious domain:woodforest-online-login.cfd
Possible suspicious domain:king365casinologin.info
Possible suspicious domain:onabetlogin.com
Possible suspicious domain:r777login.com
Possible suspicious domain:kingslot777login.bet
Possible suspicious domain:paypalcreidt.com
Possible suspicious domain:tower88login.com
Possible suspicious domain:paypalvredit.com
Possible suspicious domain:analogintegrated.com
Possible suspicious domain:lineslot77login.net
Possible suspicious domain:b2alogin.com
Possible suspicious domain:fun88login.in
Possible suspicious domain:hollywoodlogin.web.za
Possible suspicious domain:employeexpress-loginp7.shop
Possible suspicious domain:ollo88login.net
Possible suspicious domain:elang88login.co
Possible suspicious domain:mediaslot77login.com
Possible suspicious domain:login-mp.com
Possible suspicious domain:paypalservices.net
Possible suspicious domain:paycore-logint9.shop
Possible suspicious domain:tiplogin.com
Po

In [5]:
print('Number of potentially suspicious domains: {}'.format(len(possible_suspicious_domains)))

Number of potentially suspicious domains: 506


#### Spot checking some of these domains in VT, some are flagged for phishing:

#### login-soport-icloud[.]us

#### paypalcreedit[.]com

#### Grab whois info for the potentially suspicious domains

In [6]:
creation_dates=[]
expiration_dates=[]
updated_dates=[]
name_servers=[]

domains=[]
for domain in possible_suspicious_domains:
    time.sleep(7)
    print('Looking up: {}'.format(domain))
    domains.append(domain)
    w = whois.whois(domain)
    pprint(w)
    creation_dates.append(w.get('creation_date',None))
    expiration_dates.append(w.get('expiration_date',None))
    updated_dates.append(w.get('updated_date',None))
    name_servers.append(w.get('name_servers',None))

Looking up: login-eu.pw
{'address': None,
 'city': None,
 'country': None,
 'creation_date': datetime.datetime(2024, 3, 28, 20, 16, 56),
 'dnssec': 'unsigned',
 'domain_name': 'LOGIN-EU.PW',
 'emails': 'abuse@beget.com',
 'expiration_date': datetime.datetime(2025, 3, 28, 23, 59, 59),
 'name': None,
 'name_servers': ['NS2.BEGET.PRO',
                  'NS2.BEGET.COM',
                  'NS1.BEGET.PRO',
                  'NS1.BEGET.COM'],
 'org': None,
 'referral_url': None,
 'registrant_postal_code': None,
 'registrar': 'Beget LLC',
 'state': None,
 'status': ['serverHold https://icann.org/epp#serverHold',
            'serverTransferProhibited '
            'https://icann.org/epp#serverTransferProhibited',
            'clientTransferProhibited '
            'https://icann.org/epp#clientTransferProhibited'],
 'updated_date': datetime.datetime(2024, 4, 2, 20, 21, 29),
 'whois_server': 'whois.beget.com'}
Looking up: microsoft365.buzz
{'address': ['REDACTED FOR PRIVACY', '1928 E. Highland A

PywhoisError: Domain not found.
>>> Last update of WHOIS database: 2024-04-09T16:36:37Z <<<

Terms of Use: Access to Public Interest Registry WHOIS information is provided to assist persons in determining the contents of a domain name registration record in the Public Interest Registry registry database. The data in this record is provided by Public Interest Registry for informational purposes only, and Public Interest Registry does not guarantee its accuracy. This service is intended only for query-based access. You agree that you will use this data only for lawful purposes and that, under no circumstances will you use this data to (a) allow, enable, or otherwise support the transmission by e-mail, telephone, or facsimile of mass unsolicited, commercial advertising or solicitations to entities other than the data recipient's own existing customers; or (b) enable high volume, automated, electronic processes that send queries or data to the systems of Registry Operator, a Registrar, or Identity Digital except as reasonably necessary to register domain names or modify existing registrations. All rights reserved. Public Interest Registry reserves the right to modify these terms at any time. By submitting this query, you agree to abide by this policy.  The Registrar of Record identified in this output may have an RDDS service that can be queried for additional information on how to contact the Registrant, Admin, or Tech contact of the queried domain name.


#### Convert domain info into a pandas data frame and fix some formatting

In [7]:
domain_info=(list(zip(domains, creation_dates,expiration_dates,updated_dates,name_servers)))
df = pd.DataFrame(domain_info,columns=['domain','Creation_Date','Expiration_Date','Updated_Date','Name_Servers'])
df['creation_date']=df["Creation_Date"].str[0]
df['expiration_date']=df["Expiration_Date"].str[0]
df['updated_date']=df["Updated_Date"].str[0]

df['first_name_server']=df["Name_Servers"].str[0]
df['second_name_server']=df["Name_Servers"].str[1]
print(df)

                                domain  \
0                          login-eu.pw   
1                    microsoft365.buzz   
2                  bidadari99login.net   
3                klikbet77newlogin.xyz   
4                 microsoftautogen.net   
5                hollywoodlogin.africa   
6          employeexpress-loginp5.shop   
7                     tango4dlogin.net   
8           bitmart-login-official.com   
9                  bet777slotlogin.com   
10  enviromaticsystems-onmicrosoft.com   
11                      hbo77login.com   
12              augovservice-login.com   
13                    logintolink.site   
14                   goceng99login.com   
15                   warkop77login.com   
16                  loginpersik4d.info   
17                    tower88login.org   
18                     awan88login.com   
19                    paypalcreidt.com   
20                     nix777login.net   
21             ssoidrajasthanlogin.com   
22                    ketua4dlogin

#### Extraxt the tlds from the domains

In [8]:
def extract_tld(domain):
    ext=extract(domain)
    return ext.suffix

In [9]:
df['domain'] = df['domain'].astype(str)
#try:
#    df['registered_domain']=df['domain'].apply(get_tld)
#except:
#    pass
df['domain_tld'] = df['domain'].apply(extract_tld)

#f['domain_tld']=extract_tld(df.domain)
print(df)

                                domain  \
0                          login-eu.pw   
1                    microsoft365.buzz   
2                  bidadari99login.net   
3                klikbet77newlogin.xyz   
4                 microsoftautogen.net   
5                hollywoodlogin.africa   
6          employeexpress-loginp5.shop   
7                     tango4dlogin.net   
8           bitmart-login-official.com   
9                  bet777slotlogin.com   
10  enviromaticsystems-onmicrosoft.com   
11                      hbo77login.com   
12              augovservice-login.com   
13                    logintolink.site   
14                   goceng99login.com   
15                   warkop77login.com   
16                  loginpersik4d.info   
17                    tower88login.org   
18                     awan88login.com   
19                    paypalcreidt.com   
20                     nix777login.net   
21             ssoidrajasthanlogin.com   
22                    ketua4dlogin

#### Must've overloaded the whois server, only got info for 34 domains, bummer

#### Convert dates to datetime objects

In [10]:
df['expiration_date_parsed'] = pd.to_datetime(df['creation_date'], format='mixed')
df['expiration_date_parsed'] = pd.to_datetime(df['creation_date'], format='mixed')
df['updated_date_parsed'] = pd.to_datetime(df['updated_date'], format='mixed')

#### What are the most common tlds?

In [11]:
df['domain_tld'].value_counts()

domain_tld
com       16
net        5
org        3
shop       2
pw         1
buzz       1
xyz        1
africa     1
site       1
info       1
co         1
bet        1
Name: count, dtype: int64

#### The next step in automating this analysis could be to pivot on information for the domains flagged for phishing.

#### For example, the ip hosting login-soport-icloud[.]us, 162.240.148[.]126, appears to host several other suspicious domains.