## Imports and API access

In [77]:
import dotenv
import os
import requests
import pandas as pd
import pygal_maps_world.maps

dotenv.load_dotenv('.env')

alien_vault_api_key = os.getenv('AlienVault_API_Key')
virus_total_api_key = os.getenv('VirusTotal_API_Key')
abuse_ipdb_api_key = os.getenv('AbuseIPDB_API_Key')
urlhaus_api_key = os.getenv('URLHaus_API_Key')

#Check API Access
#all_keys_retrieved = all([alien_vault_api_key, virus_total_api_key, abuse_ipdb_api_key, urlhaus_api_key])
#all_keys_retrieved

## Collect OTX Data

In [17]:
otx_endpoint = 'https://otx.alienvault.com/api/v1/pulses/activity'
headers_otx = {
    'X-OTX-API-KEY': alien_vault_api_key
}

response_otx = requests.get(otx_endpoint, headers=headers_otx)
data_otx = response_otx.json()

pd.DataFrame(data_otx['results']).head()

Unnamed: 0,id,name,description,author_name,modified,created,tags,references,public,adversary,...,malware_families,attack_ids,industries,TLP,indicators,revision,groups,in_group,author,is_subscribing
0,6537c2b2571bf1a8c6ad5b86,"Stealer for PIX payment system, new Lumar stea...","In this article, we share excerpts from our re...",AlienVault,2023-10-24T13:14:21.338000,2023-10-24T13:12:18.036000,[],[],1,,...,[],[],[],white,"[{'id': 3684250565, 'indicator': '0c8e88877383...",3,[],False,"{'username': 'AlienVault', 'id': '2', 'avatar_...",
1,65330195bd87e4d1ebf51211,Organizations under attack from cryptominer-ke...,"In April of this year, the FBI published an ad...",AlienVault,2023-10-20T22:39:17.349000,2023-10-20T22:39:17.349000,[],[],1,,...,[],[],[],white,"[{'id': 3259723071, 'indicator': '0a50081a6cd3...",1,[],False,"{'username': 'AlienVault', 'id': '2', 'avatar_...",
2,6532ff7bdf2fef73982ab50d,Updated MATA attacks industrial companies in E...,"Expanding Kaspersky's research scope, Kaspersk...",AlienVault,2023-10-20T22:30:18.563000,2023-10-20T22:30:18.563000,[],[],1,Lazarus,...,[],[],[],white,"[{'id': 3773906240, 'indicator': '01b3c7b2ff7e...",1,[],False,"{'username': 'AlienVault', 'id': '2', 'avatar_...",
3,6532fd39df844d91503a94c7,The forgotten malvertising campaign,"In recent weeks, Malwarebytes has noted an inc...",AlienVault,2023-10-20T22:20:40.550000,2023-10-20T22:20:40.550000,[],[],1,,...,[],[],[],white,"[{'id': 3541137110, 'indicator': 'jquerywins.c...",1,[],False,"{'username': 'AlienVault', 'id': '2', 'avatar_...",
4,6532fc649fb565be22515704,Crambus: New Campaign Targets Middle Eastern G...,A long-running Iranian espionage group staged ...,AlienVault,2023-10-20T22:17:06.993000,2023-10-20T22:17:06.993000,[],[],1,Crambus,...,[],[],[],white,"[{'id': 3774180839, 'indicator': '159b07668073...",1,[],False,"{'username': 'AlienVault', 'id': '2', 'avatar_...",


## Collect VirusTotal Data

In [18]:
vt_endpoint = 'https://www.virustotal.com/api/v3/ip_addresses/'
headers_vt = {
    'x-apikey': virus_total_api_key
}

#TODO: Replace the sample IP address with the IP address to query
sample_ip = '8.8.8.8'
response_vt = requests.get(vt_endpoint + sample_ip, headers=headers_vt)
data_vt = response_vt.json()

data_vt

{'data': {'attributes': {'jarm': '29d3fd00029d29d00042d43d00041d598ac0c1012db967bb1ad0ff2491b3ae',
   'network': '8.8.8.0/24',
   'last_https_certificate_date': 1698107574,
   'tags': [],
   'whois': 'Level 3 Parent, LLC LVLT-ORG-8-8 (NET-8-0-0-0-1) 8.0.0.0 - 8.127.255.255\nGoogle LLC LVLT-GOGL-8-8-8 (NET-8-8-8-0-1) 8.8.8.0 - 8.8.8.255\n',
   'last_analysis_date': 1698107272,
   'as_owner': 'GOOGLE',
   'last_analysis_stats': {'harmless': 65,
    'malicious': 3,
    'suspicious': 0,
    'undetected': 21,
    'timeout': 0},
   'asn': 15169,
   'whois_date': 1697259388,
   'reputation': 565,
   'last_analysis_results': {'Bkav': {'category': 'undetected',
     'result': 'unrated',
     'method': 'blacklist',
     'engine_name': 'Bkav'},
    'CMC Threat Intelligence': {'category': 'harmless',
     'result': 'clean',
     'method': 'blacklist',
     'engine_name': 'CMC Threat Intelligence'},
    'Snort IP sample list': {'category': 'harmless',
     'result': 'clean',
     'method': 'blackli

## Collect AbuseIPDB Data

In [22]:
abuseipdb_endpoint = 'https://api.abuseipdb.com/api/v2/check'
headers_abuseipdb = {
    'Accept': 'application/json',
    'Key': abuse_ipdb_api_key
}

params_abuseipdb = {
    'ipAddress': sample_ip,
    'maxAgeInDays': '90'
}

response_abuseipdb = requests.get(abuseipdb_endpoint, headers=headers_abuseipdb, params=params_abuseipdb)
data_abuseipdb = response_abuseipdb.json()

data_abuseipdb

{'data': {'ipAddress': '8.8.8.8',
  'isPublic': True,
  'ipVersion': 4,
  'isWhitelisted': True,
  'abuseConfidenceScore': 0,
  'countryCode': 'US',
  'usageType': 'Data Center/Web Hosting/Transit',
  'isp': 'Google LLC',
  'domain': 'google.com',
  'hostnames': ['dns.google'],
  'isTor': False,
  'totalReports': 116,
  'numDistinctUsers': 31,
  'lastReportedAt': '2023-10-21T08:52:38+00:00'}}

## Collect Recently Addeed URLHaus Data

In [86]:
urlhaus_endpoint = 'https://urlhaus-api.abuse.ch/v1/urls/recent/'

response_urlhaus = requests.get(urlhaus_endpoint)
data_urlhaus = response_urlhaus.json()

urlhaus_df = pd.DataFrame(data_urlhaus['urls'])

#Remove offline malicious URLS
cleaned_urlhaus_df = urlhaus_df[urlhaus_df.url_status != 'offline']

cleaned_urlhaus_df.head()

ip_list = cleaned_urlhaus_df.host.unique().tolist()

#remove non IPv4 addresses from list
ip_list = [ip for ip in ip_list if ip.count('.') == 3]
ip_list = [ip for ip in ip_list if ip.replace('.', '').isdigit()]

#Determine countries of origin for IP addresses
ip_country_dict = {}
ip_block_list = []
for ip in ip_list:
    response_abuseipdb = requests.get(abuseipdb_endpoint, headers=headers_abuseipdb, params={'ipAddress': ip})
    data_abuseipdb = response_abuseipdb.json()
    print(data_abuseipdb)
    #if abuseConfidenceScore is greater than 15, then add to dictionary
    if data_abuseipdb['data']['abuseConfidenceScore'] > 15:
        ip_country_dict[ip] = data_abuseipdb['data']['countryCode']
        ip_block_list.append(data_abuseipdb['data']['ipAddress'])

#Now get top 10 countries from the list
from collections import Counter
top_10_countries = Counter(ip_country_dict.values()).most_common(10)
print(top_10_countries)
print("Add these IPs to firewall blacklist: ")
print(ip_block_list)




{'data': {'ipAddress': '201.150.179.211', 'isPublic': True, 'ipVersion': 4, 'isWhitelisted': False, 'abuseConfidenceScore': 4, 'countryCode': 'BO', 'usageType': 'Fixed Line ISP', 'isp': 'Comteco Ltda', 'domain': 'supernet.com.bo', 'hostnames': ['host-201-150-179-211.supernet.com.bo'], 'isTor': False, 'totalReports': 1, 'numDistinctUsers': 1, 'lastReportedAt': '2023-10-25T10:45:00+00:00'}}
{'data': {'ipAddress': '216.244.203.64', 'isPublic': True, 'ipVersion': 4, 'isWhitelisted': False, 'abuseConfidenceScore': 68, 'countryCode': 'AR', 'usageType': 'Fixed Line ISP', 'isp': 'Sinectis S.A.', 'domain': 'uolsinectis.com.ar', 'hostnames': ['ont-200-216-244-203-64.alvear.sinectis.com.ar.203.244.216.in-addr.arpa'], 'isTor': False, 'totalReports': 20, 'numDistinctUsers': 14, 'lastReportedAt': '2023-10-25T13:31:07+00:00'}}
{'data': {'ipAddress': '77.91.68.249', 'isPublic': True, 'ipVersion': 4, 'isWhitelisted': False, 'abuseConfidenceScore': 4, 'countryCode': 'FI', 'usageType': 'Data Center/Web H

### Report top 10 countries where malware sites are recently hosted

In [87]:
#translate Country Codes to Country Names
from iso3166 import countries
top_10_countries_names = []
for country_code in [country[0] for country in top_10_countries]:
    top_10_countries_names.append(countries.get(country_code).name)
for index, country in enumerate(top_10_countries_names):
    print(f'{index + 1}. {country}')


1. Argentina
2. Korea, Republic of
3. China
4. United States of America
5. Germany
6. Russian Federation
7. Hong Kong
8. Brazil
9. Netherlands
10. Cyprus


## Collect Active URLHaus Data (much larger dataset)