In [5]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
from datetime import datetime
import random
import time
import os

url_openphish = 'https://openphish.com/'
alive_sites = []
start_time = datetime.now().strftime("%m/%d/%Y %H:%M:%S")

def parse_data():
    page = requests.get(url_openphish, stream=True, allow_redirects=True, timeout=10, verify=False)
    soup = BeautifulSoup(page.text, "html.parser")
    table = soup.find('table', class_ = 'pure-table pure-table-striped')
    internal_table = table.find('tbody')
    
    now = datetime.now()
    current_time = now.strftime("%m/%d/%Y %H:%M:%S")
    date = current_time.split(" ")[0]

    for tr in internal_table.find_all('tr'):
        url = ""
        target = ""
        attack_time = ""
        row = []
        for td in tr.find_all('td'):
            row.append(td.text.strip())        
        if row:
            url = row[0]
            target = row[1]        
            attack_time = date + " " + row[2]

            datetime_object = datetime.strptime(attack_time,"%m/%d/%Y %H:%M:%S")
            if ((now - datetime_object).total_seconds()/60-180)<16:
                alive_sites.append([url, target, attack_time])

In [6]:
# Function to store the data
def store_data(start_time, end_time, alive_sites):
    df = pd.DataFrame(alive_sites, columns=["URL", "Target", "Attack Time"])
    filename = "attack_data.csv"
    if os.path.exists(filename):
        with open(filename, 'a') as f:
            df.to_csv(f, header=False, index=False)
    else:
        df.to_csv(filename, index=False)

    print("Parsing start time:", start_time)
    print("Parsing end time:", end_time)
    print("Number of unique URLs for the given period:", len(set(df['URL'])))
    print("Top 3 most frequently attacked brands:", df['Target'].value_counts().head(3))

In [7]:
# Loop to run the script every 5 minutes
while True:
    parse_data()
    end_time = datetime.now().strftime("%m/%d/%Y %H:%M:%S")
    store_data(start_time, end_time, alive_sites)
    alive_sites = []
    time.sleep(300) # Sleep for 5 minutes



Parsing start time: 02/04/2023 23:02:56
Parsing end time: 02/04/2023 23:02:59
Number of unique URLs for the given period: 4
Top 3 most frequently attacked brands: Generic/Spear Phishing    1
Facebook, Inc.            1
Crypto/Wallet             1
Name: Target, dtype: int64




Parsing start time: 02/04/2023 23:02:56
Parsing end time: 02/04/2023 23:08:00
Number of unique URLs for the given period: 3
Top 3 most frequently attacked brands: Crypto/Wallet             1
Generic/Spear Phishing    1
Facebook, Inc.            1
Name: Target, dtype: int64




Parsing start time: 02/04/2023 23:02:56
Parsing end time: 02/04/2023 23:13:00
Number of unique URLs for the given period: 5
Top 3 most frequently attacked brands: Yahoo! Inc            1
Microsoft OneDrive    1
Crypto/Wallet         1
Name: Target, dtype: int64




Parsing start time: 02/04/2023 23:02:56
Parsing end time: 02/04/2023 23:18:01
Number of unique URLs for the given period: 6
Top 3 most frequently attacked brands: AT&T Inc.                 1
Generic/Spear Phishing    1
Webmail Providers         1
Name: Target, dtype: int64




Parsing start time: 02/04/2023 23:02:56
Parsing end time: 02/04/2023 23:23:01
Number of unique URLs for the given period: 6
Top 3 most frequently attacked brands: Outlook                   1
AT&T Inc.                 1
Generic/Spear Phishing    1
Name: Target, dtype: int64




Parsing start time: 02/04/2023 23:02:56
Parsing end time: 02/04/2023 23:28:02
Number of unique URLs for the given period: 7
Top 3 most frequently attacked brands: Office365                 1
DHL Airways, Inc.         1
Generic/Spear Phishing    1
Name: Target, dtype: int64




Parsing start time: 02/04/2023 23:02:56
Parsing end time: 02/04/2023 23:33:02
Number of unique URLs for the given period: 5
Top 3 most frequently attacked brands: Office365                 1
Generic/Spear Phishing    1
Deutsche Kreditbank       1
Name: Target, dtype: int64




Parsing start time: 02/04/2023 23:02:56
Parsing end time: 02/04/2023 23:38:03
Number of unique URLs for the given period: 5
Top 3 most frequently attacked brands: Outlook                   1
Office365                 1
Generic/Spear Phishing    1
Name: Target, dtype: int64




Parsing start time: 02/04/2023 23:02:56
Parsing end time: 02/04/2023 23:43:04
Number of unique URLs for the given period: 6
Top 3 most frequently attacked brands: Credit Agricole S.A.      1
Generic/Spear Phishing    1
Microsoft OneDrive        1
Name: Target, dtype: int64




Parsing start time: 02/04/2023 23:02:56
Parsing end time: 02/04/2023 23:48:04
Number of unique URLs for the given period: 9
Top 3 most frequently attacked brands: Bank of America           1
Lojas Renner              1
Generic/Spear Phishing    1
Name: Target, dtype: int64




Parsing start time: 02/04/2023 23:02:56
Parsing end time: 02/04/2023 23:53:05
Number of unique URLs for the given period: 9
Top 3 most frequently attacked brands: Discover                  1
Generic/Spear Phishing    1
Bank of America           1
Name: Target, dtype: int64




Parsing start time: 02/04/2023 23:02:56
Parsing end time: 02/04/2023 23:58:06
Number of unique URLs for the given period: 8
Top 3 most frequently attacked brands: Crypto/Wallet             1
Generic/Spear Phishing    1
Discover                  1
Name: Target, dtype: int64




Parsing start time: 02/04/2023 23:02:56
Parsing end time: 02/05/2023 00:03:06
Number of unique URLs for the given period: 30
Top 3 most frequently attacked brands: Bank of America    1
Tencent            1
Grupo Santander    1
Name: Target, dtype: int64


KeyboardInterrupt: 

In [8]:
gained_data = pd.read_csv('attack_data.csv')

In [13]:
print("Number of unique URLs for the given period:", len(set(gained_data['URL'])))
print("Top 3 most frequently attacked brands:", gained_data['Target'].value_counts().head(3))

Number of unique URLs for the given period: 47
Top 3 most frequently attacked brands: Generic/Spear Phishing    13
Outlook                    8
Microsoft OneDrive         7
Name: Target, dtype: int64
