# Imports and Setup

In [1]:
import requests
import sys
import re
import json
import pandas as pd
import datetime

never save your api key to a script you push to a public repo :) 

In [2]:
with open('/path/to/file/containing/vt/api/key') as f:
    key = f.read()

In [3]:
request_headers = {
    'x-apikey': key.strip(),
}

In [4]:
ipregex = re.compile(r'(?:(?:\d|[01]?\d\d|2[0-4]\d|25[0-5])\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d|\d)(?:\/\d{1,2})?')
domainregex = re.compile(r'\b((?=[a-z0-9-]{1,63}\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,63}\b')
iplist = []
domainlist = []

# Input

In [5]:
with open('domains.txt') as f:
    infr_list = f.readlines()
    
for line in infr_list:
    line = line.strip()
    
    match_ipregex = ipregex.match(line)
    if match_ipregex:
        iplist.append(line)
    
    match_domainregex = domainregex.match(line)

    if match_domainregex:
        domainlist.append(line)

    elif not match_ipregex and not match_domainregex:
        print(line + " is probably not a domain or an IP")
    

# Domain Queries

In [6]:
# set up blank dataframes to hold all of our responses
main_domain_df = pd.DataFrame()
main_communicating_df = pd.DataFrame()
main_downloaded_df = pd.DataFrame()

### Query /domains endpoint

In [7]:
for domain in domainlist:
    try:
        url = "https://www.virustotal.com/api/v3/domains/" + domain
        r = requests.get(url, headers=request_headers)
        domain_data = r.json()
        
        df = pd.json_normalize(domain_data)
        df['domain'] = domain
        
        df = r.json()
        df = pd.json_normalize(df)
        df['domain'] = domain
        df['last_dns_records_human'] = pd.to_datetime(df['data.attributes.last_dns_records_date'], unit='s')
        df['last_update_date_human'] = pd.to_datetime(df['data.attributes.last_update_date'], unit='s')
        df['data.attributes.creation_date_human'] = pd.to_datetime(df['data.attributes.creation_date'], unit='s')
        main_domain_df  = pd.concat([main_domain_df, df])
        trimmed_df = main_domain_df[['domain', 'data.attributes.last_analysis_stats.malicious', 'last_dns_records_human','last_update_date_human', 'data.attributes.creation_date_human'] ]
    except:
        print("Error creating main domain df for " + domain)

In [8]:
main_domain_df[['domain', 'data.attributes.last_analysis_stats.malicious', 'last_dns_records_human','last_update_date_human', 'data.attributes.creation_date_human'] ]

Unnamed: 0,domain,data.attributes.last_analysis_stats.malicious,last_dns_records_human,last_update_date_human,data.attributes.creation_date_human
0,bigdaddy.ddns.net,4,2022-04-21 17:05:31,2022-01-13 00:36:17,2001-06-28 16:04:59
0,1biggie.publicvm.com,11,2022-07-22 10:54:37,2022-07-20 07:03:58,2007-07-19 05:57:17
0,n0fuzga.publicvm.com,10,2022-07-20 06:35:20,2022-07-20 07:03:58,2007-07-19 05:57:17
0,daveblack.publicvm.com,11,2022-05-27 12:17:17,2022-07-20 07:03:58,2007-07-19 05:57:17
0,sky01.publicvm.com,12,2022-07-14 06:41:47,2022-07-20 07:03:58,2007-07-19 05:57:17
0,franc01.publicvm.com,11,2022-06-17 02:07:43,2022-07-20 07:03:58,2007-07-19 05:57:17
0,bbrainx01.publicvm.com,9,2022-07-22 22:46:24,2022-07-20 07:03:58,2007-07-19 05:57:17


### Query /communicating_files Endpoint

In [9]:
for domain in domainlist:
    try:
        url = "https://www.virustotal.com/api/v3/domains/" + domain + "/communicating_files?limit=50" 
        # if you do not specify a limit parameter, the default value is 10
        r = requests.get(url, headers=request_headers)
        communicating_data = r.json()
    except:
        print("Couldn't reach VT for " + domain)  
    
    try:
        df = pd.json_normalize(communicating_data)
        df['domain'] = domain
        df = df.explode('data')
        df = df.reset_index()
        data = [x for x in df['data']]
        df2 = pd.concat([df.drop('data', axis=1), pd.json_normalize(data)], axis=1)
      
        df3 = df2[['domain', 'attributes.sha256', 'attributes.last_analysis_stats.malicious', 'attributes.first_submission_date', 'attributes.size'
           , 'attributes.names', 'attributes.type_description', 'attributes.type_extension']]

    except:
        print("couldn't create df for " + domain)
        # this will happen if there are no communicating files
    try:
        main_communicating_df = pd.concat([main_communicating_df, df3], axis=0)
    except:
        print("couldn't concatenate for " + domain)
    


couldn't create df for bbrainx01.publicvm.com


In [10]:
main_communicating_df['first_submission_date'] = pd.to_datetime(main_communicating_df['attributes.first_submission_date'], unit='s')
main_communicating_df = main_communicating_df.drop(columns=['attributes.first_submission_date'])

#### Display results

In [11]:
main_communicating_df.sort_values(by=['first_submission_date'], ascending=False)

Unnamed: 0,domain,attributes.sha256,attributes.last_analysis_stats.malicious,attributes.size,attributes.names,attributes.type_description,attributes.type_extension,first_submission_date
1,franc01.publicvm.com,df9ff33718bab1bc71c2736165731d3ef59d8164fa2637...,19,216068,"[CrpFRaNCVOp0.vbs, crpfrancvop0.vbs]",unknown,,2022-06-13 17:46:22
1,franc01.publicvm.com,df9ff33718bab1bc71c2736165731d3ef59d8164fa2637...,19,216068,"[CrpFRaNCVOp0.vbs, crpfrancvop0.vbs]",unknown,,2022-06-13 17:46:22
4,sky01.publicvm.com,5968c3e2838932293e0bef478f3b150fb767dbf26ab13d...,19,216094,"[c:\windows\system32\afd1ndpmx.dll, c:\windows...",unknown,,2022-06-13 16:42:18
0,franc01.publicvm.com,4530c2681887c0748cc2ecddb1976d15ad813a4a01e581...,53,48640,"[Client.exe, stage3_InstallUtil.exe]",Win32 EXE,exe,2022-05-18 15:39:56
0,franc01.publicvm.com,4530c2681887c0748cc2ecddb1976d15ad813a4a01e581...,53,48640,"[Client.exe, stage3_InstallUtil.exe]",Win32 EXE,exe,2022-05-18 15:39:56
8,1biggie.publicvm.com,d4ae6077f8161e5f7e1ba8c6728bd3ef0d27953d932f48...,24,72295,[d4ae6077f8161e5f7e1ba8c6728bd3ef0d27953d932f4...,Powershell,ps1,2022-05-11 21:50:32
4,daveblack.publicvm.com,ddfca42d6eb175f5a48463f09f165fa2f5e8aa8060327a...,55,48640,"[Client.exe, malware.exe]",Win32 EXE,exe,2022-05-05 21:24:29
7,sky01.publicvm.com,778975d324970c8ac14b1257fa51ac86c24b825e0e0c19...,46,48672,"[virus\c622227b1b8e8b564b113d902593c498, Clien...",Win32 EXE,exe,2022-04-28 03:28:13
2,1biggie.publicvm.com,2bb4bb6851ef659b9bb891e22718c5b76fde4a9041d6de...,18,13288,[Aircraft A320 Slide PN# (1).vbs],VBA,vbs,2022-04-27 18:34:46
6,sky01.publicvm.com,648b3c64a26754d82cb20eaa5bc42b96b0d5874f61bb71...,23,72378,"[C:\Users\Public\jqdr1ev4ly.PS1, 648b3c64a2675...",Powershell,ps1,2022-04-27 17:04:26


### Query /downloaded_files Endpoint

In [12]:
for domain in domainlist:
    try:
        url = "https://www.virustotal.com/api/v3/domains/" + domain + "/downloaded_files?limit=39"
        # the API returns an error for limit values above 40
        r = requests.get(url, headers=request_headers)
        downloaded_data = r.json()
    except:
        print("Couldn't reach VT for " + domain)  
    
    try:
        df = pd.json_normalize(downloaded_data)
        df['domain'] = domain
        df = df.explode('data')
        df = df.reset_index()
        data = [x for x in df['data']]
        df2 = pd.concat([df.drop('data', axis=1), pd.json_normalize(data)], axis=1)
      
        df3 = df2[['domain', 'attributes.sha256', 'attributes.last_analysis_stats.malicious', 'attributes.first_submission_date', 'attributes.size'
           , 'attributes.names', 'attributes.type_description', 'attributes.type_extension']]
    except:
        print("couldn't create df for " + domain)
        # this will happen if there are no downloaded files
    try:
        main_downloaded_df = pd.concat([main_downloaded_df, df3], axis=0)
    except:
        print("couldn't concatenate for " + domain)

couldn't create df for 1biggie.publicvm.com
couldn't create df for n0fuzga.publicvm.com
couldn't create df for daveblack.publicvm.com
couldn't create df for sky01.publicvm.com
couldn't create df for franc01.publicvm.com
couldn't create df for bbrainx01.publicvm.com


In [13]:
main_downloaded_df['first_submission_date'] = pd.to_datetime(main_downloaded_df['attributes.first_submission_date'], unit='s')
main_downloaded_df = main_downloaded_df.drop(columns=['attributes.first_submission_date'])

#### Display Results

In [14]:
main_downloaded_df.sort_values(by=['first_submission_date'], ascending=False)

Unnamed: 0,domain,attributes.sha256,attributes.last_analysis_stats.malicious,attributes.size,attributes.names,attributes.type_description,attributes.type_extension,first_submission_date
0,bigdaddy.ddns.net,1c0ec2d553a1a5d8f81a038316f82e5e4d77d7fd377f56...,0,7577,[/var/www/clean-mx/virusesevidence/output.1760...,HTML,html,2021-07-17 04:25:07
0,bigdaddy.ddns.net,1c0ec2d553a1a5d8f81a038316f82e5e4d77d7fd377f56...,0,7577,[/var/www/clean-mx/virusesevidence/output.1760...,HTML,html,2021-07-17 04:25:07
0,bigdaddy.ddns.net,1c0ec2d553a1a5d8f81a038316f82e5e4d77d7fd377f56...,0,7577,[/var/www/clean-mx/virusesevidence/output.1760...,HTML,html,2021-07-17 04:25:07
0,bigdaddy.ddns.net,1c0ec2d553a1a5d8f81a038316f82e5e4d77d7fd377f56...,0,7577,[/var/www/clean-mx/virusesevidence/output.1760...,HTML,html,2021-07-17 04:25:07
0,bigdaddy.ddns.net,1c0ec2d553a1a5d8f81a038316f82e5e4d77d7fd377f56...,0,7577,[/var/www/clean-mx/virusesevidence/output.1760...,HTML,html,2021-07-17 04:25:07
0,bigdaddy.ddns.net,1c0ec2d553a1a5d8f81a038316f82e5e4d77d7fd377f56...,0,7577,[/var/www/clean-mx/virusesevidence/output.1760...,HTML,html,2021-07-17 04:25:07
0,bigdaddy.ddns.net,1c0ec2d553a1a5d8f81a038316f82e5e4d77d7fd377f56...,0,7577,[/var/www/clean-mx/virusesevidence/output.1760...,HTML,html,2021-07-17 04:25:07


# IP Queries

In [15]:
# create blank DataFrames to hold our responses
main_ip_df = pd.DataFrame()
main_ip_communicating_df = pd.DataFrame()
main_ip_downloaded_df = pd.DataFrame()

### Query /ip_addresses/ Endpoint

In [16]:
for ip in iplist:
    try:
        
        url = "https://www.virustotal.com/api/v3/ip_addresses/" + ip
        r = requests.get(url, headers=request_headers)
    except:
        print("Couldn't reach VT for " + ip)  
    
    try:
        ip_data = r.json()
        
        df = pd.json_normalize(ip_data)
        df2 = df[['data.id', 'data.attributes.last_analysis_stats.malicious', 'data.attributes.whois_date', 'data.attributes.last_modification_date']]

    
    except:
        print("couldn't create df for " + ip)
    try:
        main_ip_df = pd.concat([main_ip_df, df2])
    except:
        print("couldn't concatenate for " + ip)

In [17]:
main_ip_df['whois_date_human'] = pd.to_datetime(main_ip_df['data.attributes.whois_date'], unit='s')
main_ip_df['last_modification_date_human'] = pd.to_datetime(main_ip_df['data.attributes.last_modification_date'], unit='s')
main_ip_df = main_ip_df.drop(columns=['data.attributes.last_modification_date', 'data.attributes.whois_date'])

#### Display Results

In [18]:
main_ip_df

Unnamed: 0,data.id,data.attributes.last_analysis_stats.malicious,whois_date_human,last_modification_date_human
0,104.21.29.171,0,2022-08-18 13:14:18,2022-08-19 18:16:40


### Query /communicating_files Endpoint

In [19]:
for ip in iplist:
    try:
        url = "https://www.virustotal.com/api/v3/ip_addresses/" + ip + "/communicating_files?limit=50"
        # as noted above, limit parameter defaults to 10 if you do not specify it
        r = requests.get(url, headers=request_headers)
    except:
        print("Couldn't reach VT for " + ip)  
    
    try:
        communicating_data = r.json()

        df = pd.json_normalize(communicating_data)
        df['ip'] = ip
        df = df.explode('data')
        df = df.reset_index()
        data = [x for x in df['data']]
        df2 = pd.concat([df.drop('data', axis=1), pd.json_normalize(data)], axis=1)
      
        df3 = df2[['ip', 'attributes.sha256', 'attributes.last_analysis_stats.malicious', 'attributes.first_submission_date', 'attributes.size'
           , 'attributes.names', 'attributes.type_description', 'attributes.type_extension']]

    except:
        print("couldn't create df for " + ip)
        # as noted above, this will happen if there are no communicating files
    try:
        main_ip_communicating_df = pd.concat([main_ip_communicating_df, df3], axis=0)
    except:
        print("couldn't concatenate for " + ip)
 

In [20]:
main_ip_communicating_df['first_submission_date'] = pd.to_datetime(main_ip_communicating_df['attributes.first_submission_date'], unit='s')
main_ip_communicating_df = main_ip_communicating_df.drop(columns=['attributes.first_submission_date'])

#### Display Results

In [21]:
main_ip_communicating_df.sort_values(by=['first_submission_date'], ascending=False)

Unnamed: 0,ip,attributes.sha256,attributes.last_analysis_stats.malicious,attributes.size,attributes.names,attributes.type_description,attributes.type_extension,first_submission_date
11,104.21.29.171,f8996298d0902b91d8f09ce66ba73022501972f4830ee3...,0,2418109,[WLW #2 - by ROI Team.pdf],PDF,pdf,2022-06-03 08:02:31
2,104.21.29.171,26a333685c8cd8223048006f279d1b4de589da4d0e42f0...,1,6594045,[],Android,apk,2022-05-31 06:39:33
8,104.21.29.171,bc6bd1def1fc60aa1221a6a4c6cfee3dcbb3422ecd12b4...,0,4116954,[],Android,apk,2022-04-03 07:23:10
9,104.21.29.171,d0a2cbef067e9c7e2100032a06f042bd668ff08f2d114b...,21,28565,[],HTML,html,2022-01-22 12:09:25
4,104.21.29.171,6e9857ba9a3a3427f69a80e71feec70a27c543f47f3a1b...,53,460880,[08e85f10ab7bb7832bd8724c72ade987.virus],Win32 EXE,exe,2021-12-15 05:45:12
5,104.21.29.171,772e3824d39e6c3c995e83addc88af6ae9956a229bc65b...,3,1126400,[MasterSeeker.exe],Win32 EXE,exe,2021-11-14 16:19:04
0,104.21.29.171,0ab5c3556818b25c3d59b895b72d0ce3da5e6564b935d5...,21,1081858,[],Win32 EXE,exe,2021-11-04 17:29:23
7,104.21.29.171,99c2076b437eccdd3f5503b1af6f3bb518fc5f65d094ff...,4,1130496,[d:\my downloads\masterseeker1.5.1\masterseeke...,Win32 EXE,exe,2014-09-28 05:20:13
6,104.21.29.171,7a468a9e7c3e9d7f9435fb47bf92bf1e608736830e201e...,5,1125888,"[MasterSeeker.exe, masterseeker.exe, %temp%\ra...",Win32 EXE,exe,2014-06-27 23:01:54
3,104.21.29.171,575ebd0f99c90abc92e4cf10c7c253ed46430312cf3fd4...,3,222425,[pkg.apk],Android,apk,2013-05-06 20:36:50


### Query /downloaded_files Endpoint

In [22]:
for ip in iplist:
    try:
        url = "https://www.virustotal.com/api/v3/ip_addresses/" + ip + "/downloaded_files?limit=39"
        # as noted above, API returns error for values above 40
        r = requests.get(url, headers=request_headers)
    except:
        print("Couldn't reach VT for " + ip)  
    
    try:
        downloaded_data = r.json()

        df = pd.json_normalize(downloaded_data)
        df['ip'] = ip
        df = df.explode('data')
        df = df.reset_index()
        data = [x for x in df['data']]
        df2 = pd.concat([df.drop('data', axis=1), pd.json_normalize(data)], axis=1)
      
        df3 = df2[['ip', 'attributes.sha256', 'attributes.last_analysis_stats.malicious', 'attributes.first_submission_date', 'attributes.size'
           , 'attributes.names', 'attributes.type_description', 'attributes.type_extension']]

    except:
        print("couldn't create df for " + ip)
        # as noted aboce, this will happen if there are no downloaded files
    try:
        main_ip_downloaded_df = pd.concat([main_ip_downloaded_df, df3], axis=0)
    except:
        print("couldn't concatenate for " + ip)


In [23]:
main_ip_downloaded_df['first_submission_date'] = pd.to_datetime(main_ip_downloaded_df['attributes.first_submission_date'], unit='s')
main_ip_downloaded_df = main_ip_downloaded_df.drop(columns=['attributes.first_submission_date'])

#### Display Results

In [24]:
main_ip_downloaded_df.sort_values(by=['first_submission_date'], ascending=False)

Unnamed: 0,ip,attributes.sha256,attributes.last_analysis_stats.malicious,attributes.size,attributes.names,attributes.type_description,attributes.type_extension,first_submission_date
7,104.21.29.171,ab65e678e0ee57b9ef8c13e5734a7613001e1f1a9c9fa4...,0,7109,"[762910965691187210, 706860923255783454, 97841...",HTML,html,2022-06-17 00:23:38
11,104.21.29.171,daff297fba433d9c199953fb523ceb3e9853f0551b7ff2...,0,2443,"[sliders, /api/sliders]",Text,txt,2022-05-31 07:00:19
6,104.21.29.171,4d5692811dd085633084b6215bee8662fe5e7526b64b1c...,0,45659,[/var/www/clean-mx/virusesevidence/output.1965...,HTML,html,2022-05-06 06:46:36
14,104.21.29.171,1d50d1dfef532616fba62aa28d0395adb48ea2a6c2309d...,0,79469,[/var/www/clean-mx/virusesevidence/output.1956...,HTML,html,2022-05-03 16:06:23
15,104.21.29.171,f4b23099c59175ebd416ec39a63db9d293def0c61514eb...,0,7109,"[905834303391424512, 807915967031541762, share...",HTML,html,2022-04-26 12:01:39
9,104.21.29.171,76bc83158e2d5946f5b6199c1805c8ffecd1757227a87e...,0,932,[goto.php],HTML,html,2022-04-23 02:02:32
18,104.21.29.171,da01d82c3cd416b4663aff1698a4077d3d54e7c6f4dc21...,0,7109,"[920558096424771594, 951452297072873522, 87410...",HTML,html,2022-04-21 13:42:23
19,104.21.29.171,86edb184f2347fde21ca9914b8a8b46403c46a5908eba1...,0,7109,"[858716014715469864, 821829564962832424, 68879...",HTML,html,2022-04-12 05:10:42
13,104.21.29.171,432280ce32b853953f5b84301b20a0d6708c91912c89fc...,0,81138,[],HTML,html,2022-04-06 22:31:23
16,104.21.29.171,2b89a16e76690b72d3b5d4ff10b7e722734962971886bb...,0,2714,[],JavaScript,js,2022-03-29 01:40:20


Of course, this is just the beginning of our analysis! You could filter out files by number malicious ratings, filetype, or the submisison date