### Imports and Constants

In [1]:
import pandas as pd
from passivetotal import analyzer
analyzer.init()
# set the date range we want the passivetotal analyzer to look at, see https://passivetotal.readthedocs.io/en/latest/analyzer.html#initialization for more
analyzer.set_date_range(start='2022-02-01', end='2022-03-01') 

#### Example %%bash magic

In [2]:
%%bash
echo "physiciansofficenews.com" > output.txt

### Read Text, return PT API for each Hostname, output results into DataFrame

In [3]:
with open('output.txt', encoding='utf-8') as foo:
    f = foo.readlines()

output = pd.DataFrame()

for item in f:
    host = analyzer.Hostname(item.strip())
    resolutions = host.resolutions
    records = resolutions.only_a_records.as_df
    output = pd.concat([output, records])

### Example Data Analysis

In [4]:
output.sort_values(by=['firstseen'], ascending=False)

Unnamed: 0,query,recordtype,resolve,resolvetype,collected,firstseen,lastseen,duration,sources
0,physiciansofficenews.com,A,45.61.185.234,ip,2022-05-26 23:24:38,2022-02-02 04:18:26,2022-02-26 17:26:23,24,"[riskiq, mnemonic, emerging_threats]"


In [5]:
# do a reverse lookup on the IP addresses in the above dataframe to find A records for all domains resolving to these IP addresses

reverse_df = pd.DataFrame()

for ip in output["resolve"]:
    reverse_df = pd.concat([reverse_df,analyzer.IPAddress(ip).resolutions.only_a_records.as_df])

In [6]:
reverse_df

Unnamed: 0,query,recordtype,resolve,resolvetype,collected,firstseen,lastseen,duration,sources
0,45.61.185.234,A,physiciansofficenews.com,domain,2022-05-26 23:24:38,2022-02-02 04:22:23,2022-02-26 17:26:23,24,"[riskiq, emerging_threats]"
1,45.61.185.234,A,ns2.physiciansofficenews.com,domain,2022-05-26 23:24:38,2022-02-02 04:22:23,2022-04-03 09:24:39,60,[riskiq]
2,45.61.185.234,A,mail.physiciansofficenews.com,domain,2022-05-26 23:24:38,2022-02-04 07:45:15,2022-02-21 06:21:39,16,[riskiq]
3,45.61.185.234,A,www.physiciansofficenews.com,domain,2022-05-26 23:24:38,2022-02-10 12:45:46,2022-02-10 12:45:55,0,[riskiq]
4,45.61.185.234,A,s1.xuanyimoli.com,domain,2022-05-26 23:24:38,2021-09-04 12:13:29,2022-05-14 03:24:01,251,[riskiq]
5,45.61.185.234,A,ns1.physiciansofficenews.com,domain,2022-05-26 23:24:38,2022-02-02 04:22:23,2022-04-03 09:24:39,60,[riskiq]


In [19]:
reverse_df[reverse_df['duration'] > 0]

Unnamed: 0,query,recordtype,resolve,resolvetype,collected,firstseen,lastseen,duration,sources
0,45.61.185.234,A,physiciansofficenews.com,domain,2022-05-26 23:24:38,2022-02-02 04:22:23,2022-02-26 17:26:23,24,"[riskiq, emerging_threats]"
1,45.61.185.234,A,ns2.physiciansofficenews.com,domain,2022-05-26 23:24:38,2022-02-02 04:22:23,2022-04-03 09:24:39,60,[riskiq]
2,45.61.185.234,A,mail.physiciansofficenews.com,domain,2022-05-26 23:24:38,2022-02-04 07:45:15,2022-02-21 06:21:39,16,[riskiq]
4,45.61.185.234,A,s1.xuanyimoli.com,domain,2022-05-26 23:24:38,2021-09-04 12:13:29,2022-05-14 03:24:01,251,[riskiq]
5,45.61.185.234,A,ns1.physiciansofficenews.com,domain,2022-05-26 23:24:38,2022-02-02 04:22:23,2022-04-03 09:24:39,60,[riskiq]


In [9]:
# analyzer.Hostname('crowdstrike.com').whois returns current whois results for a given domain.
# we call this, again with the as_df method, and store the results in a dataframe to compare WHOIS data

whois_df = pd.DataFrame()

for domain in output["query"]:
    whois_df = pd.concat([whois_df,analyzer.Hostname(domain).whois.as_df])

In [10]:
whois_df

Unnamed: 0,query,organization,name,telephone,email,registrant_org,registrant_name,registrant_phone,registrant_email,registrar,server,age,date_registered,date_updated,date_loaded,nameservers,date_expires
0,physiciansofficenews.com,Domain Privacy Service FBO Registrant,Domain Privacy Service FBO Registrant,1.8014948462,whois@hostmonster.com,,Domain Privacy Service FBO Registrant,1.8014948462,whois@hostmonster.com,FastDomain Inc.,rdap.fastdomain.com,2613,2015-03-31 14:43:26-07:00,2022-05-02 17:14:52-07:00,2022-05-05 13:51:14.238000-07:00,"[ns1.physiciansofficenews.com, ns2.physicianso...",2023-03-31 07:43:26-07:00


In [11]:
# the pandas value_counts method returns a series in descending order of each unique value by frequency https://pandas.pydata.org/docs/reference/api/pandas.Series.value_counts.html
# below are just a few examples of things you can do to look for most prevalent values

whois_df["organization"].value_counts()

Domain Privacy Service FBO Registrant    1
Name: organization, dtype: int64

In [12]:
whois_df["registrar"].value_counts()

FastDomain Inc.    1
Name: registrar, dtype: int64

In [13]:
whois_df["nameservers"].value_counts()

[ns1.physiciansofficenews.com, ns2.physiciansofficenews.com]    1
Name: nameservers, dtype: int64

In [14]:
# get just SOA records

soa_df = pd.DataFrame()

for domain in output["query"]:
    res_df = analyzer.Hostname(domain).resolutions.as_df
    # there is no method akin to only_a_records, hence the filtering by recordtype
    new_soa = res_df[(res_df["recordtype"] == 'SOA')]
    soa_df = pd.concat([soa_df,new_soa])

In [15]:
soa_df

Unnamed: 0,query,recordtype,resolve,resolvetype,collected,firstseen,lastseen,duration,sources
2,physiciansofficenews.com,SOA,ns1.domain.tld,domain,2022-05-26 23:24:38,2022-02-04 07:45:15,2022-02-16 01:58:56,11,[riskiq]
6,physiciansofficenews.com,SOA,root@physiciansofficenews.com,email,2022-05-26 23:24:38,2022-02-04 07:45:15,2022-02-16 01:58:56,11,[riskiq]


In [16]:
# list the most common SOA email addresses
soa_df[(soa_df['resolvetype'] == 'email')]['resolve'].value_counts()

root@physiciansofficenews.com    1
Name: resolve, dtype: int64

In [17]:
mx_df = pd.DataFrame()

for domain in output["query"]:
    res_df = analyzer.Hostname(domain).resolutions.as_df
    # there is no method akin to only_a_records, hence the filtering by recordtype
    new_mx = res_df[(res_df["recordtype"] == 'MX')]
    mx_df = pd.concat([mx_df,new_mx])

In [18]:
mx_df

Unnamed: 0,query,recordtype,resolve,resolvetype,collected,firstseen,lastseen,duration,sources
4,physiciansofficenews.com,MX,mail.physiciansofficenews.com,domain,2022-05-26 23:24:38,2018-01-16 02:36:33,2022-02-21 06:21:39,1497,[riskiq]
