In [None]:
import requests
from bs4 import BeautifulSoup

def fetch_page(url):
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        return soup
    else:
        return None

In [None]:
import re

def index_words(soup):
    index = {}
    words = re.findall(r'\w+', soup.get_text())
    for word in words:
        word = word.lower()
        if word in index:
            index[word] += 1
        else:
            index[word] = 1
    return index

In [None]:
def remove_stop_words(index):
    stop_words = {'a', 'an', 'the', 'and', 'or', 'in', 'on', 'at', 'to'}
    for stop_word in stop_words:
        if stop_word in index:
            del index[stop_word]
    return index

In [None]:
from nltk.stem import PorterStemmer

def apply_stemming(index):
    stemmer = PorterStemmer()
    stemmed_index = {}
    for word, count in index.items():
        stemmed_word = stemmer.stem(word)
        if stemmed_word in stemmed_index:
            stemmed_index[stemmed_word] += count
        else:
            stemmed_index[stemmed_word] = count
    return stemmed_index

In [None]:
def search(query, index):

   stemmer = PorterStemmer()
   query_words = re.findall(r'\w+', query.lower())
   print(query)
   results = {}
   for word in query_words:
        word = stemmer.stem(word)
        if word in index:
            results[word] = index[word]
   return results

In [None]:
def search_engine(url, query):
    soup = fetch_page(url)
    if soup is None:
        return None
    index = index_words(soup)
    index = remove_stop_words(index)
    index = apply_stemming(index)
    results = search(query, index)
    return results

In [None]:
url = 'https://w3.braude.ac.il/?lang=en'
query = 'Industry'
results = search_engine(url, query)
print(results)

Industry
{'industri': 8}


In [None]:
query = 'Braude college'
results = search_engine(url, query)
print(results)

Braude college
{'braud': 13, 'colleg': 8}


In [None]:
query = 'Galilee center'
results = search_engine(url, query)
print(results)

Galilee center
{'galile': 15, 'center': 4}


In [None]:
important = {"Dynamic", "Deployment", "Edge", "Functions", "Rendering", "Regeneration", "API", "Network", "Domain", "Management"}
url = 'https://vercel.com/home'

results_dict = {word: None for word in important}

for word in important:
    results = search_engine(url, word)
    results_dict[word] = results["count"]

print(results_dict)


{'Edge': 4, 'Functions': 9, 'Deployment': 10, 'Regeneration': 12, 'Rendering': 9, 'Dynamic': 7, 'Management': 10, 'API': 3, 'Domain': 6, 'Network': 7}


In [None]:
!pip install firebase


Collecting firebase
  Downloading firebase-4.0.1-py3-none-any.whl.metadata (6.5 kB)
Downloading firebase-4.0.1-py3-none-any.whl (12 kB)
Installing collected packages: firebase
Successfully installed firebase-4.0.1


In [None]:
from firebase import firebase
import json

FBconn = firebase.FirebaseApplication('https://anantirgul-default-rtdb.europe-west1.firebasedatabase.app/',None)

result = FBconn.put('/index', 'keywords', results_dict)
print("Saved to Firebase:", result)



Saved to Firebase: {'API': 3, 'Deployment': 10, 'Domain': 6, 'Dynamic': 7, 'Edge': 4, 'Functions': 9, 'Management': 10, 'Network': 7, 'Regeneration': 12, 'Rendering': 9}
