## 3. Python Script - Answers

In [None]:
# These are the required libraries for the script
%pip install shodan
%pip install matplotlib
%pip install pandas
%pip install plotly
%pip install folium
%pip install cartopy
%pip install nbformat
%pip install pycountry

### SHODAN AND CENSYS QUERY (ALSO SAVING THE RESULTS IN A JSON FILE)

In [None]:
import shodan
import json 

api = shodan.Shodan('YOUR_API_KEY')

diccionariVulns = {}
diccionariCities = {}
diccionariVulnsYears = {}

# This is the
results = api.search('http.title:"Login" port:80', limit=300)

with open('shodan_results.json', 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=4)  

### DOTS MAP  

In [None]:
import json
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from collections import defaultdict

# Initilize the figure and the map
plt.figure(figsize=(16, 9))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_global()
ax.coastlines()
ax.add_feature(cfeature.BORDERS)
ax.add_feature(cfeature.LAND, facecolor='lightgray')
ax.add_feature(cfeature.OCEAN, facecolor='lightblue')

# Recover the data from the JSON file
with open('shodan_results.json', 'r', encoding='utf-8') as f:
    data = json.load(f)
    
# Create a dictionary to track coordinates and their frequency
coordinate_counts = defaultdict(int)

# Add each point with real lat/lon and count repetitions
for result in data['matches']:
    location = result.get('location', {})
    lat = location.get('latitude')
    lon = location.get('longitude')

    if lat is not None and lon is not None:
        coords = (lat, lon)
        coordinate_counts[coords] += 1

# Define a function to get color based on the count
def get_color_for_count(count):
    if count == 1:
        return 'lightgreen' 
    elif count == 2:
        return 'yellow'
    elif count <= 5:
        return 'orange'
    elif count <= 10:
        return 'red'
    else:
        return 'black'

# Create legend labels and handles
legend_labels = ['1 device (Lightgreen)', '2 devices (Yellow)', '3-5 devices (Orange)',
                 '6-10 devices (Red)', 'More than 10 devices (Black)']
legend_handles = [
    plt.Line2D([0], [0], marker='o', color='w', markerfacecolor='lightgreen', markersize=10),
    plt.Line2D([0], [0], marker='o', color='w', markerfacecolor='yellow', markersize=10),
    plt.Line2D([0], [0], marker='o', color='w', markerfacecolor='orange', markersize=10),
    plt.Line2D([0], [0], marker='o', color='w', markerfacecolor='red', markersize=10),
    plt.Line2D([0], [0], marker='o', color='w', markerfacecolor='black', markersize=10),
]

# Paint the points with colors according to the number of devices
for coords, count in coordinate_counts.items():
    lat, lon = coords
    color = get_color_for_count(count)  # Obtain color based on count
    plt.plot(lon, lat, 'o', markersize=5, color=color, transform=ccrs.Geodetic())

# Add legend
ax.legend(legend_handles, legend_labels, title="Number of devices", loc='lower left', fontsize=10)

plt.title('Devices connected based on coordinates  (Cartopy)', fontsize=14)

plt.show()

### PIE CHART WITH VULNEREABLE/NON-VULNEREABLE DEVICES

In [None]:
import matplotlib.pyplot as plt
import json
from collections import Counter

# Recover the data from the JSON file
with open('shodan_results.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# Count the countries
country_counts = Counter()

for result in data.get('matches', []):
    country = result.get('location', {}).get('country_name', 'Unknown')
    country_counts[country] += 1

# Sort the counts in descending order
# and prepare the data for the pie chart
sorted_counts = country_counts.most_common()

# Split the labels and sizes
labels = [item[0] for item in sorted_counts]
sizes = [item[1] for item in sorted_counts]

# Top N
top_n = 9
labels_top = labels[:top_n] + ['Others']
sizes_top = sizes[:top_n] + [sum(sizes[top_n:])]

# Create the pie chart
plt.figure(figsize=(8, 8))
plt.pie(sizes_top, labels=labels_top, autopct='%1.1f%%', startangle=140)
plt.title('Distribution of devices by country')
plt.show()

### IPs DISTRIBUTION WITH/WITHOUT VULNEREABILITIES AND CVSS RATINGS

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import json

# Cargar datos
with open('shodan_results.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

vulnIPs = set()
noVulnIPs = set()
cvssRatings = [0] * 10

for result in data.get('matches', []):
    ip = result.get('ip_str', 'Unknown')
    vulns = result.get('vulns', {})

    if vulns:
        vulnIPs.add(ip)
        for vuln in vulns.values():
            score = vuln.get('cvss', 0)
            index = min(int(score), 9)
            cvssRatings[index] += 1
    else:
        noVulnIPs.add(ip)

noVulnIPs.difference_update(vulnIPs)

# === PIE CHART===
labels = ['With vulnerabilities', 'Without vulnerabilities']
sizes = [len(vulnIPs), len(noVulnIPs)]
colors = ['red', 'green']

plt.figure(figsize=(7, 7))
plt.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=140)
plt.title('IPs distribution with/without vulnerabilities')
plt.axis('equal')
plt.show()

# === BAR CHART: CVSS RATINGS ===
ratings = [f'{i}-{i+1}' for i in range(10)]

plt.figure(figsize=(10, 6))
plt.bar(ratings, cvssRatings, color='skyblue')
plt.title('Vulnerabilities quantity by CVSS ratings')
plt.xlabel('CVSS Ratings')
plt.ylabel('Number of vulnerabilities')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()


### BAR CHART WITH VULNEREABLE DEVICE NUMBER 

In [None]:
from collections import Counter
import pandas as pd
import matplotlib.pyplot as plt

# Count the countries with vulnerabilities
country_counts = Counter()

for result in data['matches']:
    if 'vulns' in result and result['vulns']:
        country = result.get('location', {}).get('country_name', 'Unknown')
        country_counts[country] += 1

# Create a DataFrame for the top 10 countries
df_ips = pd.DataFrame.from_dict(country_counts, orient='index', columns=["Num_IPs"])
df_ips = df_ips.sort_values(by="Num_IPs", ascending=False).head(10).reset_index()

df_ips.rename(columns={"index": "Country"}, inplace=True)

# Bar chart for the top 10 countriesº
plt.figure(figsize=(10, 6))
plt.barh(df_ips["Country"], df_ips["Num_IPs"], color="skyblue")
plt.xlabel("Number of vulnerable IPs")
plt.title("Top 10 countries with more vulnerable IPs")
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()

### STACKED BAR CHART COMPARING VULNEREABLES DEVICES BASED ON THE ORG/ISP/ASN

In [None]:
from collections import Counter
import pandas as pd
import matplotlib.pyplot as plt

# Count the countries with vulnerabilities
country_counts = Counter()
from collections import defaultdict
import pandas as pd
import matplotlib.pyplot as plt

# Function to generate bar plots for top 10 fields
def generate_top10_barplot(topic, title):
    stats = defaultdict(lambda: {"With Vuln": 0, "Without Vuln": 0})
    
    for result in data['matches']:
        key = result.get(topic, "Unknown")
        has_vuln = "With Vuln" if result.get("vulns") else "Without Vuln"
        stats[key][has_vuln] += 1

    df = pd.DataFrame(stats).T
    df = df.sort_values(by="With Vuln", ascending=False).head(10)
    """
    df["Total"] = df["With Vuln"] + df["Without Vuln"]
    df = df.sort_values(by="Total", ascending=False).head(10)
    df = df.drop(columns="Total")
    """

    df.plot(kind="bar", stacked=True, figsize=(10, 6), colormap="Set2")
    plt.xlabel(topic.capitalize())
    plt.ylabel("Number of devices")
    plt.title(f"{title}")
    plt.xticks(rotation=45, ha="right")
    plt.tight_layout()
    plt.show()

# Generate bar plots for top 10 fields(org, isp, asn)
generate_top10_barplot("org", "Top 10 organizations with vulnerabilities")
generate_top10_barplot("isp", "Top 10 ISPs with vulnerabilities")
generate_top10_barplot("asn", "Top 10 Autonomous Systems with vulnerabilities")


### BAR CHART WITH THE TOP 10 MORE VULNEREABLES

In [None]:
from collections import Counter

ip_vuln_count = Counter()

for result in data.get('matches', []):
    ip = result.get('ip_str', 'Unknown')
    vulns = result.get('vulns', {})
    ip_vuln_count[ip] += len(vulns)

top_ips = ip_vuln_count.most_common(15)
ips, counts = zip(*top_ips)

plt.figure(figsize=(10,5))
plt.barh(ips, counts, color='tomato')
plt.xlabel('Number of vulnerabilities')
plt.title('Top 10 IPs more vulnereables')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()

### RELATION BETWEEN CVSS AND EPSS DOT CHART

In [None]:
import matplotlib.pyplot as plt

cvss_vals = []
epss_vals = []
labels = []

for result in data.get('matches', []):
    for cve, details in result.get('vulns', {}).items():
        cvss_vals.append(details.get('cvss', 0))
        epss_vals.append(details.get('epss', 0))
        labels.append(cve)

plt.figure(figsize=(8,6))
scatter = plt.scatter(cvss_vals, epss_vals, alpha=0.7, c=epss_vals, cmap='viridis')
plt.colorbar(scatter, label='EPSS')
plt.xlabel('CVSS')
plt.ylabel('EPSS')
plt.title('Relation between CVSS and EPSS')
plt.grid(True)
plt.tight_layout()
plt.show()

### Chart incluiding all vulns and its characteristics

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import json

# Carregar JSON
with open('shodan_results.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# Llistes per emmagatzemar les dades
ips = []
cve_ids = []
cvss_scores = []
epss_scores = []
verified_flags = []
summaries = []
reference_counts = []

# Recorre cada resultat i extreu vulnerabilitats si existeixen
for result in data.get('matches', []):
    vulns = result.get('vulns', {})
    ip = result.get('ip_str', 'Unknown')  # o 'ip' si el camps es diu així
    for cve, details in vulns.items():
        ips.append(ip)
        cve_ids.append(cve)
        cvss_scores.append(details.get("cvss", 0))
        epss_scores.append(details.get("epss", 0))
        verified_flags.append(details.get("verified", False))
        summaries.append(details.get("summary", "")[:80] + "...")
        reference_counts.append(len(set(details.get("references", []))))

# Crea DataFrame
df = pd.DataFrame({
    'IP': ips,
    'CVE': cve_ids,
    'CVSS': cvss_scores,
    'EPSS': epss_scores,
    'Verified': verified_flags,
    'Summary': summaries,
    'Unique references': reference_counts
})

# Mostrar com taula
print(df.to_markdown(index=False))