# Introduction

This notebook provides an example of how to set up filters for phishing entries using `FeedManager`.
It allows filtering either all entries globally or entries defined per `FeedSource`.

# Installation


In [None]:
!pip install phishing-web-collector>=0.3.0

# Import libraries  

In [None]:
import csv

import phishing_web_collector as pwc

# Configure phishing feeds


In [None]:
manager = pwc.FeedManager(
    sources=[
        pwc.FeedSource.CERT_PL,
        pwc.FeedSource.TWEET_FEED,
    ],
    storage_path="feeds_data",
)

In [None]:
# Configure filters

In [None]:
from phishing_web_collector.models import PhishingEntry, EntryFilter


def url_contains_facebook() -> EntryFilter:
    def _f(e: PhishingEntry) -> bool:
        return "facebook" in e.url.lower()
    return _f

def url_contains_com() -> EntryFilter:
    def _f(e: PhishingEntry) -> bool:
        return e.url.lower().endswith(".com") or ".com/" in e.url.lower()
    return _f

manager.add_filter(url_contains_com())
manager.add_filter_for_feed(pwc.FeedSource.TWEET_FEED, url_contains_facebook())

# Retrieve feeds

In [None]:
entries = await manager.retrieve_all()

# Transform entries to CSV

In [None]:

phishing_domains = [pwc.get_domain_from_url(item.url) for item in entries]

with open("phishing_domains.csv", mode="w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerow(["Domain"])
    for domain in phishing_domains:
        writer.writerow([domain])
print("First 10 phishing domains:")
print(phishing_domains[:10])