# Web Scrapper

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import os

def scrape_annapolis(filename='Annapolis_PD_raw_data_new.csv', max_iterations=None):
    base_url = 'https://www.annapolis.gov/list.aspx?PRVMSG=253'
    response = requests.get(base_url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    table = soup.find('table')
    if not table:
        raise ValueError("Could not find the table on the page.")
    
    rows = table.find_all('tr')[1:]
    print(f'Total rows found in table: {len(rows)}')
    
    records = []
    for i, row in enumerate(rows):
        if max_iterations and i >= max_iterations:
            break
            
        cells = row.find_all('td')
        if len(cells) >= 2:
            subject = cells[0].get_text(strip=True)
            date_sent = cells[1].get_text(strip=True)
            link_tag = cells[0].find('a')
            if link_tag and link_tag.get('href'):
                link = f"https://www.annapolis.gov{link_tag.get('href')}"
                records.append({'title': subject, 'date_sent': date_sent, 'link': link})
    
    print(f'Processing {len(records)} records...')
    
    for record in records:
        link = record['link']
        print(f'Scraping: {link}')
        response = requests.get(link)
        if response.status_code != 200:
            print(f'Failed to fetch {link}')
            record['content'] = None
            continue

        soup = BeautifulSoup(response.text, 'html.parser')
        for script in soup(["script", "style"]):
            script.extract()
        
        text = soup.get_text()
        record['content'] = ' '.join(text.split())

    df = pd.DataFrame(records)
    
    os.makedirs('data', exist_ok=True)
    output_path = os.path.join('data', filename)
    df.to_csv(output_path, index=False)
    print(f'Saved {len(records)} records to {output_path}')
    return df

In [3]:
df = scrape_annapolis(filename='Annapolis_PD_raw_data_new.csv', max_iterations=15)
df.head()

Total rows found in table: 2135
Processing 15 records...
Scraping: https://www.annapolis.gov/CivicSend/ViewMessage/message/247966
Scraping: https://www.annapolis.gov/CivicSend/ViewMessage/message/247842
Scraping: https://www.annapolis.gov/CivicSend/ViewMessage/message/247727
Scraping: https://www.annapolis.gov/CivicSend/ViewMessage/message/247641
Scraping: https://www.annapolis.gov/CivicSend/ViewMessage/message/247473
Scraping: https://www.annapolis.gov/CivicSend/ViewMessage/message/247344
Scraping: https://www.annapolis.gov/CivicSend/ViewMessage/message/247093
Scraping: https://www.annapolis.gov/CivicSend/ViewMessage/message/247086
Scraping: https://www.annapolis.gov/CivicSend/ViewMessage/message/247083
Scraping: https://www.annapolis.gov/CivicSend/ViewMessage/message/246948
Scraping: https://www.annapolis.gov/CivicSend/ViewMessage/message/246523
Scraping: https://www.annapolis.gov/CivicSend/ViewMessage/message/246401
Scraping: https://www.annapolis.gov/CivicSend/ViewMessage/message/2

Unnamed: 0,title,date_sent,link,content
0,Annapolis Police Department Daily Report - 12/...,12/12/2024 11:06 AM,https://www.annapolis.gov/CivicSend/ViewMessag...,Annapolis Police Department Daily Report - 12/...
1,Annapolis Police Department Daily Report - 12/...,12/11/2024 9:38 AM,https://www.annapolis.gov/CivicSend/ViewMessag...,Annapolis Police Department Daily Report - 12/...
2,Annapolis Police Department Daily Report - 12/...,12/10/2024 8:15 AM,https://www.annapolis.gov/CivicSend/ViewMessag...,Annapolis Police Department Daily Report - 12/...
3,Annapolis Police Department Daily Report - 12/...,12/9/2024 10:50 AM,https://www.annapolis.gov/CivicSend/ViewMessag...,Annapolis Police Department Daily Report - 12/...
4,Annapolis Police Department Daily Report - 12/...,12/6/2024 8:42 AM,https://www.annapolis.gov/CivicSend/ViewMessag...,Annapolis Police Department Daily Report - 12/...
