In [8]:
import ftplib
from sqlalchemy import create_engine, text
from sqlalchemy.sql import select
from bs4 import BeautifulSoup
import numpy as np 
from io import BytesIO
import pandas as pd
import time 
from urllib.request import urlopen
import numexpr as ne
import requests

This notebook outlines how to dump the lightcurves and alerts into databases. Let's use SQLAlchemy Core.

First, let's put the lightcurves into the database.

In [9]:
engine = create_engine('sqlite:///foo.db')
conn = engine.connect()

# MOA

In [99]:
# So we don't have to deal with the log10 complaining.
import warnings
warnings.filterwarnings("ignore")

url = "http://www.massey.ac.nz/~iabond/moa/alert2022/alert.php"
response = urlopen(url)
html = response.read()
response.close()
soup = BeautifulSoup(html,"html.parser")
links = soup.find_all('a', href=True)
alert_dirs = []
# Get a list of all the bulge microlensing alerts
for ii, link in enumerate(links):
    if 'BLG' in link.text:
        alert_dirs.append(links[ii]['href'])
        
counter = 0

t0 = time.time()

for nn, alert_dir in enumerate(alert_dirs[0:10]):
    url = "http://www.massey.ac.nz/~iabond/moa/alert2022/" + alert_dir
    response = urlopen(url)
    html = response.read()
    response.close()
    soup = BeautifulSoup(html,"html.parser")

    # Get the magnitude and flux offsets.
    foo = soup.find('b').next_sibling
    moff = foo.split('=')[1].split('-')[0].strip(' ')
    bah = soup.find('sub').next_sibling
    foff = bah.split('+')[1].split(')')[0].strip(' ')

    # Now convert these into floats
    m = ne.evaluate(moff)
    f = ne.evaluate(foff)

    # Now scrape the .dat file into a pandas dataframe.
    url = "https://www.massey.ac.nz/~iabond/moa/alert2022/fetchtxt.php?path=moa/ephot/phot-" + \
            alert_dir.strip('display.php?id=') + ".dat"
    bytes_data = requests.get(url).content
    df = pd.read_csv(BytesIO(bytes_data), 
                     delim_whitespace=True, skiprows=11, skipfooter=1, header=None, engine='python', 
                     names=['hjd', 'delta_flux', 'flux_err', 'foo1', 'foo2', 'foo3', 'foo4', 'foo5'])

    df['mag'] = m - 2.5*np.log10(df['delta_flux'] + f)
    df['mag_err'] = 1.09 * df['flux_err']/(df['delta_flux'] + f)
    df['alert_name'] = 'MB22' + str(nn + 1).zfill(3)  # need to make sure this always works.

    df.dropna(axis='index', how='any', inplace=True)

    cols = ['hjd', 'mag', 'mag_err', 'alert_name']
    df[cols].to_sql(con=engine, schema=None, name="moa", if_exists="append", index=False)
    counter += 1
t1 = time.time()

print('Time to write table: {0:.0f} sec for {1} alerts'.format(t1 - t0, counter))

Time to write table: 31 sec for 10 alerts


# OGLE

In [100]:
ftp = ftplib.FTP("ftp.astrouw.edu.pl")
ftp.login()
ftp.cwd("ogle/ogle4/ews/2019/")

counter = 0

t0 = time.time()

for nn in np.arange(start=1, stop=10, step=1):
    ftp.cwd("blg-" + str(nn).zfill(4))
    
    flo = BytesIO()
    ftp.retrbinary('RETR phot.dat', flo.write)
    flo.seek(0)
    df = pd.read_fwf(flo, header=0, names=['hjd', 'mag', 'mag_err', 'see', 'sky'])
    
    df['alert_name'] = 'OB19' + str(nn + 1).zfill(4) 

    cols = ['hjd', 'mag', 'mag_err', 'alert_name']
    df[cols].to_sql(con=engine, schema=None, name="ogle", if_exists="append", index=False)
    
    ftp.cwd("../")
    
    counter += 1
t1 = time.time()

print('Time to write table: {0:.0f} sec for {1} alerts'.format(t1 - t0, counter))

Time to write table: 14 sec for 9 alerts


# KMTNet

In [10]:
counter = 0

t0 = time.time()

for nn in np.arange(start=1, stop=11, step=1):
    # For KMTNet, get data from all the telescopes?
    url = "https://kmtnet.kasi.re.kr/~ulens/event/2022/view.php?event=KMT-2022-BLG-" + str(nn).zfill(4)
    response = urlopen(url)
    html = response.read()
    response.close()
    soup = BeautifulSoup(html,"html.parser")
    
    links = soup.find_all('a', href=True)

    # Only keep I-band lightcurves. 
    pysis_names = links[3].get_text(separator=',').split(',')[:-2]
    
    for pysis_name in pysis_names:
        url = "https://kmtnet.kasi.re.kr/~ulens/event/2022/data/KB22" + str(nn).zfill(4) + "/pysis/" + pysis_name
        bytes_data = requests.get(url).content
        try:
            df = pd.read_csv(BytesIO(bytes_data), 
                             delim_whitespace=True, skiprows=1, header=None, 
                             names=['hjd', 'Delta_flux', 'flux_err', 'mag', 'mag_err', 'fwhm', 'sky', 'secz'])

            df['alert_name'] = 'KB22' + str(nn + 1).zfill(4) 
            df['lightcurve'] = pysis_name

            cols = ['hjd', 'mag', 'mag_err', 'lightcurve', 'alert_name']
            df[cols].to_sql(con=engine, schema=None, name="kmtnet", if_exists="append", index=False)
            counter += 1
        except:
            print('This doesn\'t exist, skipping.'.format(nn))
            continue
t1 = time.time()

print('Time to write table: {0:.0f} sec for {1} files ({2} alerts)'.format(t1 - t0, counter, nn))

KeyboardInterrupt: 

In [11]:
counter = 0

t0 = time.time()

for nn in np.arange(start=1, stop=11, step=1):
    # For KMTNet, get data from all the telescopes?
    url = "https://kmtnet.kasi.re.kr/~ulens/event/2022/view.php?event=KMT-2022-BLG-" + str(nn).zfill(4)
    response = urlopen(url)
    html = response.read()
    response.close()
    soup = BeautifulSoup(html,"html.parser")
    
    links = soup.find_all('a', href=True)

    # Only keep I-band lightcurves. 
    # FIXME: Could probably trim this further down by only taking the one with the most data?
    # But not sure if checking is more expensive than just writing it.
    pysis_names = links[3].get_text(separator=',').split(',')[:-2]
    
    for pysis_name in pysis_names:
        if '_I.pysis' in pysis_name:
            url = "https://kmtnet.kasi.re.kr/~ulens/event/2022/data/KB22" + str(nn+1).zfill(4) + "/pysis/" + pysis_name
            bytes_data = requests.get(url).content
            try:
                df = pd.read_csv(BytesIO(bytes_data), 
                                 delim_whitespace=True, skiprows=1, header=None, 
                                 names=['hjd', 'Delta_flux', 'flux_err', 'mag', 'mag_err', 'fwhm', 'sky', 'secz'])

                df['alert_name'] = 'KB22' + str(nn + 1).zfill(4) 
                df['lightcurve'] = pysis_name
                
                cols = ['hjd', 'mag', 'mag_err', 'lightcurve', 'alert_name']
                df[cols].to_sql(con=engine, schema=None, name="kmtnet", if_exists="append", index=False)
                counter += 1
            except:
                print('This doesn\'t exist, skipping.'.format(nn))
                continue
t1 = time.time()

print('Time to write table: {0:.0f} sec for {1} files ({2} alerts)'.format(t1 - t0, counter, nn))

KeyboardInterrupt: 

In [102]:
# s = text('DROP TABLE kmtnet')
# result = conn.execute(s)

# s = text('DROP TABLE ogle')
# result = conn.execute(s)

# s = text('DROP TABLE moa')
# result = conn.execute(s)

In [None]:
# First way to query the table.
# result = engine.execute("SELECT HJD FROM kmtnet").fetchall()

# Second way to query the table.
# s = text('SELECT * FROM kmtnet')
# result = conn.execute(s)
# result.fetchall()

Next, we'll put the alerts into the database. 

Do we want to post all alert values, or just the subset I have here?

# MOA

In [37]:
url = "http://www.massey.ac.nz/~iabond/moa/alert2022/alert.php"
response = urlopen(url)
html = response.read()
response.close()
soup = BeautifulSoup(html,"html.parser")

# Grab columns for tE and Ibase.
tE = soup.find_all('td')[4::8]
Ibase = soup.find_all('td')[6::8]

# Convert them from strings to floats.
tE_list = [float(ne.evaluate(item.get_text())) for item in tE]
Ibase_list = [float(ne.evaluate(item.get_text())) for item in Ibase]

# Now, grab the classification column.
cat = soup.find_all('td')[7::8]
cat_list = [item.get_text() for item in cat]

# Link to the alert page.
alert_url = soup.find_all('td')[0::8]
moa_alert_url = 'http://www.massey.ac.nz/~iabond/moa/alert2022/'
alert_url_list = [moa_alert_url + item.find_all('a', href=True)[0]['href'] for item in alert_url]

# Alert name
nn = len(tE_list)
alert_name = []
for ii in np.arange(nn):
    alert_name.append('MB22' + str(ii+1).zfill(3))

# Put it all into a dataframe.
df = pd.DataFrame(list(zip(alert_name, cat_list, tE_list, Ibase_list, alert_url_list)),
                 columns =['alert_name', 'class', 'tE', 'Ibase', 'alert_url'])

df.to_sql(con=engine, schema=None, name="moa_alerts", if_exists="replace", index=False)

211

# OGLE 

In [52]:
def ogle_str_to_float(item):
    try:
        return float(ne.evaluate(item.contents[0].replace(u'\n', '')))
    except:
        return
    
# Get alerts using beautiful soup.
url = "https://ogle.astrouw.edu.pl/ogle4/ews/2019/ews.html"
response = urlopen(url)
html = response.read()
response.close()
soup = BeautifulSoup(html,"html.parser")

# Grab columns for tE and Ibase.
tE = soup.find_all('td')[8::15] 
Ibase = soup.find_all('td')[13::15]

# Convert them from strings to floats.
tE_list = [ogle_str_to_float(item) for item in tE]
Ibase_list = [ogle_str_to_float(item) for item in Ibase]
    
# Alert name and page link.
nn = len(tE_list)
alert_name = []
alert_url_list = []
ogle_alert_url = 'https://ogle.astrouw.edu.pl/ogle4/ews/'

for ii in np.arange(nn):
    alert_name.append('OB19' + str(ii+1).zfill(4))
    alert_url_list.append(ogle_alert_url + str(ii+1).zfill(4) + '.html')

# Put it all into a dataframe.
df = pd.DataFrame(list(zip(alert_name, tE_list, Ibase_list, alert_url_list)),
                 columns =['alert_name', 'tE', 'Ibase', 'alert_url'])

df.to_sql(con=engine, schema=None, name="ogle_alerts", if_exists="replace", index=False)

1526

# KMTNet

In [20]:
def kmtnet_str_to_float(item):
    try:
        return float(ne.evaluate(item.get_text().replace(u'\xa0', u'')))
    except:
        return

year = '2021'
url = "https://kmtnet.kasi.re.kr/~ulens/event/" + year + "/"
response = urlopen(url)
html = response.read()
response.close()
soup = BeautifulSoup(html,"html.parser")

if year in ['2022', '2020', '2017', '2016']:
    tE = soup.find_all('td')[7::15][1:]
    Ibase = soup.find_all('td')[10::15][1:]
    cat = soup.find_all('td')[3::15][1:]
elif year in ['2021', '2019', '2018']:
    tE = soup.find_all('td')[8::16][1:]
    Ibase = soup.find_all('td')[11::16][1:]
    cat = soup.find_all('td')[4::15][1:]
else:
    raise Exception('Not a valid year')

tE_list = [kmtnet_str_to_float(item) for item in tE]
Ibase_list = [kmtnet_str_to_float(item) for item in Ibase]
cat_list = [item.get_text().replace(u'\xa0', u'') for item in cat]

# Link to the alert page.
alert_url = soup.find_all('td')[0::15][1:]
kmt_alert_url = 'https://kmtnet.kasi.re.kr/~ulens/event/' + year + '/'
alert_url_list = [kmt_alert_url + item.find_all('a', href=True)[0]['href'] for item in alert_url]

# Alert name
nn = len(tE_list)
alert_name = []
for ii in np.arange(nn):
    alert_name.append('KB22' + str(ii+1).zfill(4))

# Put it all into a dataframe.
df = pd.DataFrame(list(zip(alert_name, cat_list, tE_list, Ibase_list, alert_url_list)),
                 columns =['alert_name', 'class', 'tE', 'Ibase', 'alert_url'])

df.to_sql(con=engine, schema=None, name="kmt_alerts", if_exists="replace", index=False)

IndexError: list index out of range

In [21]:
import pdb
pdb.pm()

> [0;32m<ipython-input-20-ce2013813073>[0m(32)[0;36m<listcomp>[0;34m()[0m
[0;32m     30 [0;31m[0malert_url[0m [0;34m=[0m [0msoup[0m[0;34m.[0m[0mfind_all[0m[0;34m([0m[0;34m'td'[0m[0;34m)[0m[0;34m[[0m[0;36m0[0m[0;34m:[0m[0;34m:[0m[0;36m15[0m[0;34m][0m[0;34m[[0m[0;36m1[0m[0;34m:[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     31 [0;31m[0mkmt_alert_url[0m [0;34m=[0m [0;34m'https://kmtnet.kasi.re.kr/~ulens/event/'[0m [0;34m+[0m [0myear[0m [0;34m+[0m [0;34m'/'[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 32 [0;31m[0malert_url_list[0m [0;34m=[0m [0;34m[[0m[0mkmt_alert_url[0m [0;34m+[0m [0mitem[0m[0;34m.[0m[0mfind_all[0m[0;34m([0m[0;34m'a'[0m[0;34m,[0m [0mhref[0m[0;34m=[0m[0;32mTrue[0m[0;34m)[0m[0;34m[[0m[0;36m0[0m[0;34m][0m[0;34m[[0m[0;34m'href'[0m[0;34m][0m [0;32mfor[0m [0mitem[0m [0;32min[0m [0malert_url[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     33 [0;31m[0;34

ipdb> alert_url[0].find_all('a', href=True)
[]
ipdb> alert_url[1].find_all('a', href=True)
[]
ipdb> alert_url[0]
<td>Related event</td>
--KeyboardInterrupt--

KeyboardInterrupt: Interrupted by user


In [4]:
engine.table_names()

  engine.table_names()


['kmt_alerts', 'kmtnet', 'moa', 'moa_alerts', 'ogle', 'ogle_alerts']

In [106]:
engine.execute("SELECT * FROM kmtnet").fetchall()[0:10]

[(9641.22538, 15.9393, 0.0074, 'KMTA04_I.pysis', 'KB220002'),
 (9644.20631, 16.1599, 0.0122, 'KMTA04_I.pysis', 'KB220002'),
 (9644.26027, 15.9376, 0.0064, 'KMTA04_I.pysis', 'KB220002'),
 (9648.23209, 15.9066, 0.0055, 'KMTA04_I.pysis', 'KB220002'),
 (9649.18022, 15.9085, 0.0056, 'KMTA04_I.pysis', 'KB220002'),
 (9650.17749, 15.8952, 0.0062, 'KMTA04_I.pysis', 'KB220002'),
 (9650.23155, 15.9029, 0.0055, 'KMTA04_I.pysis', 'KB220002'),
 (9652.1816, 15.8897, 0.0058, 'KMTA04_I.pysis', 'KB220002'),
 (9652.23741, 15.8934, 0.0045, 'KMTA04_I.pysis', 'KB220002'),
 (9653.20286, 15.8563, 0.0055, 'KMTA04_I.pysis', 'KB220002')]

In [118]:
engine.execute("SELECT * FROM kmt_alerts").fetchall()[0:10]

[('KB220001', 'clear', 47.92, 16.03, 'https://kmtnet.kasi.re.kr/~ulens/event/2022/view.php?event=KMT-2022-BLG-0001'),
 ('KB220002', 'clear', 6.64, 16.58, 'https://kmtnet.kasi.re.kr/~ulens/event/2022/view.php?event=KMT-2022-BLG-0002'),
 ('KB220003', 'clear', 189.81, 19.14, 'https://kmtnet.kasi.re.kr/~ulens/event/2022/view.php?event=KMT-2022-BLG-0003'),
 ('KB220004', 'clear', 5.0, 17.62, 'https://kmtnet.kasi.re.kr/~ulens/event/2022/view.php?event=KMT-2022-BLG-0004'),
 ('KB220005', 'clear', 30.9, 17.41, 'https://kmtnet.kasi.re.kr/~ulens/event/2022/view.php?event=KMT-2022-BLG-0005'),
 ('KB220006', 'clear', 47.95, 18.86, 'https://kmtnet.kasi.re.kr/~ulens/event/2022/view.php?event=KMT-2022-BLG-0006'),
 ('KB220007', 'clear', 7.5, 16.75, 'https://kmtnet.kasi.re.kr/~ulens/event/2022/view.php?event=KMT-2022-BLG-0007'),
 ('KB220008', 'clear', 99.57, 18.43, 'https://kmtnet.kasi.re.kr/~ulens/event/2022/view.php?event=KMT-2022-BLG-0008'),
 ('KB220009', 'probable', 6.91, 17.73, 'https://kmtnet.kasi.r

In [119]:
engine.execute("SELECT * FROM ogle").fetchall()[0:10]

[(2457424.86801, 17.262, 0.013, 'OB220002'),
 (2457426.88843, 17.274, 0.025, 'OB220002'),
 (2457427.85677, 17.292, 0.018, 'OB220002'),
 (2457428.85249, 17.262, 0.013, 'OB220002'),
 (2457429.84937, 17.274, 0.015, 'OB220002'),
 (2457431.84908, 17.264, 0.014, 'OB220002'),
 (2457432.85138, 17.241, 0.013, 'OB220002'),
 (2457433.84953, 17.287, 0.014, 'OB220002'),
 (2457434.84167, 17.253, 0.017, 'OB220002'),
 (2457435.84744, 17.325, 0.018, 'OB220002')]

In [120]:
engine.execute("SELECT * FROM ogle_alerts").fetchall()[0:10]

[('OB190001', 54.554, 17.267, 'https://ogle.astrouw.edu.pl/ogle4/ews/0000.html'),
 ('OB190002', 2.061, 15.999, 'https://ogle.astrouw.edu.pl/ogle4/ews/0001.html'),
 ('OB190003', 51.13, 18.293, 'https://ogle.astrouw.edu.pl/ogle4/ews/0002.html'),
 ('OB190004', 2.673, 19.763, 'https://ogle.astrouw.edu.pl/ogle4/ews/0003.html'),
 ('OB190005', 17.447, 18.588, 'https://ogle.astrouw.edu.pl/ogle4/ews/0004.html'),
 ('OB190006', 11.543, 19.354, 'https://ogle.astrouw.edu.pl/ogle4/ews/0005.html'),
 ('OB190007', 33.064, 18.977, 'https://ogle.astrouw.edu.pl/ogle4/ews/0006.html'),
 ('OB190008', 103.082, 18.373, 'https://ogle.astrouw.edu.pl/ogle4/ews/0007.html'),
 ('OB190009', 83.851, 19.07, 'https://ogle.astrouw.edu.pl/ogle4/ews/0008.html'),
 ('OB190010', 36.882, 13.683, 'https://ogle.astrouw.edu.pl/ogle4/ews/0009.html')]

In [121]:
engine.execute("SELECT * FROM moa").fetchall()[0:10]

[(2453665.89736, 17.31345229775939, 0.30650568956881086, 'MB22001'),
 (2453666.87143, 17.297472140289724, 0.29902339121361526, 'MB22001'),
 (2453667.86726, 18.806602068065125, 1.428339356277067, 'MB22001'),
 (2453667.91423, 18.595125021713844, 0.5900105029368989, 'MB22001'),
 (2453824.13931, 21.09675360031579, 2.0866864926727384, 'MB22001'),
 (2453837.12457, 21.15913915153447, 1.0965901846786645, 'MB22001'),
 (2453837.17138, 20.641273090478833, 0.9590328599599511, 'MB22001'),
 (2453839.07034, 20.467020389216028, 0.5677064613487173, 'MB22001'),
 (2453839.11008, 21.536248087070604, 1.6210742184679157, 'MB22001'),
 (2453839.14989, 22.266006587346027, 2.9583624975803273, 'MB22001')]

In [122]:
engine.execute("SELECT * FROM moa_alerts").fetchall()[0:10]

[('MB22000', 'microlensing', 50.5, 19.63, 'http://www.massey.ac.nz/~iabond/moa/alert2022/display.php?id=gb10-R-6-69586'),
 ('MB22001', 'microlensing', 104.99, 15.47, 'http://www.massey.ac.nz/~iabond/moa/alert2022/display.php?id=gb12-R-9-23523'),
 ('MB22002', 'microlensing', 40.81, 16.57, 'http://www.massey.ac.nz/~iabond/moa/alert2022/display.php?id=gb13-R-9-75098'),
 ('MB22003', 'microlensing', 24.41, 16.33, 'http://www.massey.ac.nz/~iabond/moa/alert2022/display.php?id=gb17-R-9-23110'),
 ('MB22004', 'microlensing', 40.69, 16.96, 'http://www.massey.ac.nz/~iabond/moa/alert2022/display.php?id=gb5-R-8-125249'),
 ('MB22005', 'microlensing', 12.53, 15.73, 'http://www.massey.ac.nz/~iabond/moa/alert2022/display.php?id=gb8-R-2-48157'),
 ('MB22006', 'microlensing', 125.36, 17.36, 'http://www.massey.ac.nz/~iabond/moa/alert2022/display.php?id=gb9-R-2-225288'),
 ('MB22007', 'microlensing', 54.28, 16.82, 'http://www.massey.ac.nz/~iabond/moa/alert2022/display.php?id=gb9-R-8-73343'),
 ('MB22008', 'mic

In [30]:
%%writefile query_alerts.py

# FIXME: ONLY DOWNLOAD ALERT YEAR + PREVIOUS YEAR (otherwise tooons of data.)

import ftplib
from sqlalchemy import create_engine, text
from sqlalchemy.sql import select
from bs4 import BeautifulSoup
import numpy as np 
from io import BytesIO
import pandas as pd
import time 
from urllib.request import urlopen
import numexpr as ne
import requests
engine = create_engine('sqlite:///microlensing.db')
conn = engine.connect()

def get_moa_lightcurves(year):
    # So we don't have to deal with the log10 complaining.
    import warnings
    warnings.filterwarnings("ignore")

    year = str(year)

    url = "http://www.massey.ac.nz/~iabond/moa/alert" + year + "/alert.php"
    response = urlopen(url)
    html = response.read()
    response.close()
    soup = BeautifulSoup(html,"html.parser")
    links = soup.find_all('a', href=True)
    alert_dirs = []
    # Get a list of all the bulge microlensing alerts
    for ii, link in enumerate(links):
        if 'BLG' in link.text:
            alert_dirs.append(links[ii]['href'])

    for nn, alert_dir in enumerate(alert_dirs[0:10]):
        url = "http://www.massey.ac.nz/~iabond/moa/alert" + year + "/" + alert_dir
        response = urlopen(url)
        html = response.read()
        response.close()
        soup = BeautifulSoup(html,"html.parser")

        # Get the magnitude and flux offsets.
        foo = soup.find('b').next_sibling
        moff = foo.split('=')[1].split('-')[0].strip(' ')
        bah = soup.find('sub').next_sibling
        foff = bah.split('+')[1].split(')')[0].strip(' ')

        # Now convert these into floats
        m = ne.evaluate(moff)
        f = ne.evaluate(foff)

        # Now scrape the .dat file into a pandas dataframe.
        url = "https://www.massey.ac.nz/~iabond/moa/alert" + year + "/fetchtxt.php?path=moa/ephot/phot-" + \
                alert_dir.strip('display.php?id=') + ".dat"
        bytes_data = requests.get(url).content
        df = pd.read_csv(BytesIO(bytes_data), 
                         delim_whitespace=True, skiprows=11, skipfooter=1, header=None, engine='python', 
                         names=['hjd', 'delta_flux', 'flux_err', 'foo1', 'foo2', 'foo3', 'foo4', 'foo5'])

        df['mag'] = m - 2.5*np.log10(df['delta_flux'] + f)
        df['mag_err'] = 1.09 * df['flux_err']/(df['delta_flux'] + f)
        df['alert_name'] = 'MB' + year[2:] + str(nn + 1).zfill(3)  # need to make sure this always works.
        
        df['hjd'] -= 2450000

        df.dropna(axis='index', how='any', inplace=True)

        cols = ['hjd', 'mag', 'mag_err', 'alert_name']
        df[cols].to_sql(con=engine, schema=None, name="moa_" + year, if_exists="append", index=False)
        
def get_ogle_lightcurves(year):
    year = str(year)

    ftp = ftplib.FTP("ftp.astrouw.edu.pl")
    ftp.login()
    ftp.cwd("ogle/ogle4/ews/" + year + "/")

    for nn in np.arange(start=1, stop=10, step=1):
        ftp.cwd("blg-" + str(nn).zfill(4))

        flo = BytesIO()
        ftp.retrbinary('RETR phot.dat', flo.write)
        flo.seek(0)
        df = pd.read_fwf(flo, header=0, names=['hjd', 'mag', 'mag_err', 'see', 'sky'])

        df['alert_name'] = 'OB' + year[2:] + str(nn + 1).zfill(4) 

        cols = ['hjd', 'mag', 'mag_err', 'alert_name']
        df[cols].to_sql(con=engine, schema=None, name="ogle_" + year, if_exists="append", index=False)

        ftp.cwd("../")
        
def get_kmtnet_lightcurves(year):
    year = str(year)
    
    for nn in np.arange(start=1, stop=11, step=1):
        # For KMTNet, get data from all the telescopes?
        url = "https://kmtnet.kasi.re.kr/~ulens/event/" + year + "/view.php?event=KMT-" + year + \
                "-BLG-" + str(nn).zfill(4)
        response = urlopen(url)
        html = response.read()
        response.close()
        soup = BeautifulSoup(html,"html.parser")

        links = soup.find_all('a', href=True)

        # Only keep I-band lightcurves. 
        pysis_names = links[3].get_text(separator=',').split(',')[:-2]
        for pysis_name in pysis_names:
            if '_I.pysis' in pysis_name:
                url = "https://kmtnet.kasi.re.kr/~ulens/event/" + year + "/data/KB" + \
                        year[2:] + str(nn).zfill(4) + "/pysis/" + pysis_name
                print(url)
                bytes_data = requests.get(url).content
                try:
                    df = pd.read_csv(BytesIO(bytes_data), 
                                     delim_whitespace=True, skiprows=1, header=None, 
                                     names=['hjd', 'Delta_flux', 'flux_err', 'mag', 'mag_err', 'fwhm', 'sky', 'secz'])

                    df['alert_name'] = 'KB' + year[2:] + str(nn).zfill(4) 
                    df['lightcurve'] = pysis_name

                    cols = ['hjd', 'mag', 'mag_err', 'lightcurve', 'alert_name']
                    df[cols].to_sql(con=engine, schema=None, name="kmtnet_" + year, if_exists="append", index=False)
                except:
                    print('This doesn\'t exist, skipping.'.format(nn))
                    continue
                    
def get_moa_alerts(year):
    year = str(year)

    url = "http://www.massey.ac.nz/~iabond/moa/alert2022/alert.php"
    response = urlopen(url)
    html = response.read()
    response.close()
    soup = BeautifulSoup(html,"html.parser")

    
    # Grab columns for tE and Ibase.
    tE = soup.find_all('td')[4::8]
    Ibase = soup.find_all('td')[6::8]

    # Convert them from strings to floats.
    tE_list = [float(ne.evaluate(item.get_text())) for item in tE]
    Ibase_list = [float(ne.evaluate(item.get_text())) for item in Ibase]

    # Now, grab the classification column.
    cat = soup.find_all('td')[7::8]
    cat_list = [item.get_text() for item in cat]

    # Link to the alert page.
    alert_url = soup.find_all('td')[0::8]
    moa_alert_url = 'http://www.massey.ac.nz/~iabond/moa/alert' + year + '/'
    alert_url_list = [moa_alert_url + item.find_all('a', href=True)[0]['href'] for item in alert_url]

    # Alert name
    nn = len(tE_list)
    alert_name = []
    for ii in np.arange(nn):
        alert_name.append('MB' + year[2:] + str(ii+1).zfill(3))

    # Put it all into a dataframe.
    df = pd.DataFrame(list(zip(alert_name, cat_list, tE_list, Ibase_list, alert_url_list)),
                     columns =['alert_name', 'class', 'tE', 'Ibase', 'alert_url'])

    df.to_sql(con=engine, schema=None, name="moa_alerts_" + year, if_exists="replace", index=False)
    
def get_ogle_alerts(year):
    def ogle_str_to_float(item):
        try:
            return float(ne.evaluate(item.contents[0].replace(u'\n', '')))
        except:
            return

    year = str(year)
  
    # Get alerts using beautiful soup.
    url = "https://ogle.astrouw.edu.pl/ogle4/ews/" + year + "/ews.html"
    response = urlopen(url)
    html = response.read()
    response.close()
    soup = BeautifulSoup(html,"html.parser")

    # Grab columns for tE and Ibase.
    tE = soup.find_all('td')[8::15] 
    Ibase = soup.find_all('td')[13::15]

    # Convert them from strings to floats.
    tE_list = [ogle_str_to_float(item) for item in tE]
    Ibase_list = [ogle_str_to_float(item) for item in Ibase]

    # Alert name and page link.
    nn = len(tE_list)
    alert_name = []
    alert_url_list = []
    ogle_alert_url = 'https://ogle.astrouw.edu.pl/ogle4/ews/'

    for ii in np.arange(nn):
        alert_name.append('OB' + year[2:] + str(ii+1).zfill(4))
        alert_url_list.append(ogle_alert_url + str(ii+1).zfill(4) + '.html')

    # Put it all into a dataframe.
    df = pd.DataFrame(list(zip(alert_name, tE_list, Ibase_list, alert_url_list)),
                     columns =['alert_name', 'tE', 'Ibase', 'alert_url'])

    df.to_sql(con=engine, schema=None, name="ogle_alerts_" + year, if_exists="replace", index=False)
    
def get_kmtnet_alerts(year):
    """
    year is an integer.
    """
    def kmtnet_str_to_float(item):
        try:
            return float(ne.evaluate(item.get_text().replace(u'\xa0', u'')))
        except:
            return

    year = str(year)
    url = "https://kmtnet.kasi.re.kr/~ulens/event/" + year + "/"
    response = urlopen(url)
    html = response.read()
    response.close()
    soup = BeautifulSoup(html,"html.parser")

    if year in ['2022', '2020', '2017', '2016']:
        tE = soup.find_all('td')[7::15][1:]
        Ibase = soup.find_all('td')[10::15][1:]
        cat = soup.find_all('td')[3::15][1:]
    elif year in ['2021', '2019', '2018']:
        tE = soup.find_all('td')[8::16][1:]
        Ibase = soup.find_all('td')[11::16][1:]
        cat = soup.find_all('td')[4::16][1:]
    else:
        raise Exception('Not a valid year')

    tE_list = [kmtnet_str_to_float(item) for item in tE]
    Ibase_list = [kmtnet_str_to_float(item) for item in Ibase]
    cat_list = [item.get_text().replace(u'\xa0', u'') for item in cat]

    # Link to the alert page.
    if year in ['2022', '2020', '2017', '2016']:
        alert_url = soup.find_all('td')[0::15][1:]
    elif year in ['2021', '2019', '2018']:
        alert_url = soup.find_all('td')[0::16][1:]
    else:
        raise Exception('Not a valid year')
    kmt_alert_url = 'https://kmtnet.kasi.re.kr/~ulens/event/' + year + '/'
    alert_url_list = [kmt_alert_url + item.find_all('a', href=True)[0]['href'] for item in alert_url]

    # Alert name
    nn = len(tE_list)
    alert_name = []
    for ii in np.arange(nn):
        alert_name.append('KB' + year[2:] + str(ii+1).zfill(4))

    # Put it all into a dataframe.
    df = pd.DataFrame(list(zip(alert_name, cat_list, tE_list, Ibase_list, alert_url_list)),
                     columns =['alert_name', 'class', 'tE', 'Ibase', 'alert_url'])

    df.to_sql(con=engine, schema=None, name="kmtnet_alerts_" + year, if_exists="replace", index=False)

Overwriting query_alerts.py
