# SNDAQ Alert History

This scrapes the sn-wg mailing list for alert messages about supernova candidates. Ideally this data should be grabbed from i3live...

In [80]:
from bs4 import BeautifulSoup

from datetime import datetime
from requests import get
from requests.auth import HTTPBasicAuth

In [81]:
months = ['January', 'February', 'March',     'April',   'May',      'June',
          'July',    'August'  , 'September', 'October', 'November', 'December']

## Web Scraper

Use `requests` and `BeautifulSoup` to read the sn-wg mailing list mailman posts and extract the candidate information from the subject line of emails sent to the page.

In [87]:
def get_alert_data(year=2019, month=1):
    """Get all alert information for a given year and month from the sn-wg mailing list.
    
    Parameters
    ==========
    year : int
        The year [20xx].
    month : int
        The month [1..12].
    """
    # Grab the data from the current month.
    url = r'http://lists.icecube.wisc.edu/pipermail/sn-wg/{}-{}/date.html'.format(year, months[month-1])
    r = get(url, auth=HTTPBasicAuth('icecube', 'skua'))
    soup = BeautifulSoup(r.text)
    
    # Parse the data for candidate alert emails.
    alerts = []
    
    anchors = soup.find_all('a')
    for anchor in anchors:
        if 'significance SN candidate' in anchor.text:
            i = 0
            if 'ATTENTION' in anchor.text or 'LocalTestSystem' in anchor.text:
                i = 1
            tokens = anchor.text.strip().split()

            sig_cor, sig_raw = [float(t) for t in (tokens[1+i], tokens[3+i])]
            run_id = int(tokens[15+i][1:])

            date_str = ' '.join(tokens[17+i:19+i])
            frac_sec = date_str.find('.')
            alert_date = datetime.strptime(date_str[:frac_sec], '%Y-%m-%d %H:%M:%S')
            if alert_date < datetime(2017,2,21):
                continue
            
            alert = [sig_cor, sig_raw, run_id, alert_date]
            alerts.append(alert)
            
    return alerts

## Access Data

Grab data from 2018 and 2019.

In [88]:
with open('sndaq_alerts_2017.txt', 'w') as f:
    f.write('# sig_cor sig_raw run_id date\n')
    for month in range(1, 13):
        alerts = get_alert_data(2017, month)
        for alert in alerts:
            scor, sraw, run, date = alert
#             print('{:8.3g} {:8.3g} {:12d}   {}'.format(scor, sraw, run, date))
            f.write('{:8.3g} {:8.3g} {:12d}   {}\n'.format(scor, sraw, run, date))

In [78]:
with open('sndaq_alerts_2018.txt', 'w') as f:
    f.write('# sig_cor sig_raw run_id date\n')
    for month in range(1, 13):
        alerts = get_alert_data(2018, month)
        for alert in alerts:
            scor, sraw, run, date = alert
#             print('{:8.3g} {:8.3g} {:12d}   {}'.format(scor, sraw, run, date))
            f.write('{:8.3g} {:8.3g} {:12d}   {}\n'.format(scor, sraw, run, date))

In [79]:
with open('sndaq_alerts_2019.txt', 'w') as f:
    f.write('# sig_cor sig_raw run_id date\n')
    for month in range(1, 10):
        alerts = get_alert_data(2019, month)
        for alert in alerts:
            scor, sraw, run, date = alert
#             print('{:8.3g} {:8.3g} {:12d}   {}'.format(scor, sraw, run, date))
            f.write('{:8.3g} {:8.3g} {:12d}   {}\n'.format(scor, sraw, run, date))