In [2]:
import os
from os import path
import sqlite3
global_sqlite_filename='apf.db'
import pandas
from datetime import datetime, timedelta
import numpy as np
from astropy.time import Time
from sqlalchemy import create_engine
import pytz
import numpy as np

In [3]:
class col_types:
    def __init__(self):
        self.TEXT = 'TEXT'
        self.INTEGER = 'INTEGER'
        self.REAL = 'REAL'
        self.BLOB = 'BLOB'
        self.NULL = 'NULL'
        
ct = col_types()
print(ct.TEXT)

#print(os.getcwd())
#print(os.listdir())

TEXT


In [4]:
def create_database(sqlite_filename=global_sqlite_filename):
    conn = sqlite3.connect(sqlite_filename)
    cursor = conn.cursor()
    return cursor
       
def get_telemetry_columns():
    columns = {'DateTime': ct.TEXT, 
                  'TARGET': ct.NULL,
                  'MIDPTFIN': ct.REAL,
                  'AZ': ct.REAL,
                  'EL': ct.REAL,
                  'AZENCPOS': ct.REAL,
                  'ELENCPOS': ct.REAL,
                  'AZENCVEL': ct.REAL,
                  'ELENCVEL': ct.REAL,
                  'AZFLWERR': ct.REAL,
                  'ELFLWERR': ct.REAL,
                  'OUTFILE': ct.REAL,
                  'OBSNUM': ct.REAL,
                  'MODE': ct.REAL,
                  'AVG_FWHM': ct.REAL,
                  'M5WIND': ct.REAL,
                  'M5WINDAZ': ct.REAL,
                  'TAVERAGE': ct.REAL,
                  'TM1S210': ct.REAL,
                  'TM2CAIR': ct.REAL,
                  'OFFSET_AZ': ct.REAL,
                  'OFFSET_EL': ct.REAL,
                  'RMSOFFSET_AZ': ct.REAL,
                  'RMSOFFSET_EL': ct.REAL,
                  'AVGOFFSET_AZ': ct.REAL,
                  'AVGOFFSET_EL': ct.REAL,
                  'HATCHPOS': ct.TEXT,
                  'EVENT': ct.TEXT}    
    return columns
    
def get_velocity_columns():
    columns = [('MdptJulian', ct.REAL),
               ('velocity', ct.REAL),
               ('velocity_error', ct.REAL),
               ('stellar_activity1', ct.REAL),
               ('stellar_activity2', ct.REAL),
               ('total_counts', ct.REAL),
               ('brad_doesnt_know', ct.REAL),
               ('DateTimeUTC', ct.TEXT),
               ('DateTimeLocal', ct.TEXT),
               ('starID', ct.TEXT)
              ]
    return columns
    
def create_telemetry_table(cursor):        
    columns = get_telemetry_columns()
    l = ['{} {}'.format(k,v) for (k, v) in columns.items()]
    print(len(l))
    full_list = ', '.join(l)
    sql_create_string = 'CREATE TABLE telemetry (' + full_list + ', PRIMARY KEY (DateTime))'
    print(sql_create_string)        
    cursor.execute(sql_create_string)

    return


def create_velocity_table(cursor):
    columns = get_velocity_columns()
    l = ['{} {}'.format(k,v) for (k, v) in columns]
    full_list = ', '.join(l)
    sql_create_string = 'CREATE TABLE velocity (' + full_list + ', PRIMARY KEY (DateTimeLocal))'
    print(sql_create_string)  
    cursor.execute(sql_create_string)

    return

def open_connection(sqlite_filename=global_sqlite_filename):
    dburi = 'file:{}?mode=rw'.format(sqlite_filename)
    conn = sqlite3.connect(dburi, uri=True)
    return conn
    

In [5]:
conn = open_connection()
cursor = conn.cursor()
#create_telemetry_table(cursor)
#create_velocity_table(cursor)

In [6]:
def populate_telemetry_table(conn, filenames):
    for csv_file in filenames:
        try:
            df = pandas.read_csv(csv_file, 
                               sep='\t', skiprows=1, header=None, names=get_telemetry_columns(), comment='#')
            df.to_sql('telemetry', conn, if_exists='append', index=False)
            
        except Exception as e:
            print(csv_file)
            print(e)

def populate_telemetry(conn=None, dirname=None, batchsize=5):
    filenames = [os.path.join(dirname, x) for x in os.listdir(dirname)]
    filenames.sort()
    
    def batch(iterable, n=1):
        l = len(iterable)
        for ndx in range(0, l, n):
            yield iterable[ndx : min(ndx + n, l)]
    
    for file_batch in batch(filenames, n=batchsize):
        populate_telemetry_table(conn, file_batch)
        
    


        

In [7]:
#populate_telemetry(conn, '../telemetry_data', batchsize=10)

In [8]:
def load_velocity_csv(dirname, filename):
    full_path = os.path.join(dirname, filename)
    columns = get_velocity_columns()[:-2] # leave off the last two column names
                                        # because we're calculating them below.
    df = pandas.read_csv(full_path, sep='\s+',
            names=[x[0] for x in columns]) 
                                                
    return df
    
    
california_tzinfo = pytz.timezone('US/Pacific')

# returns a string for a single julian date. Couldn't figure out how to vectorize.
def convert_julian_to_datetime_local(juliantime):
    times = Time(juliantime, format='jd')
    full_iso = times.to_datetime(timezone=california_tzinfo).isoformat()
    return full_iso[:-6]

# takes in an array of julian dates. Returns an array of local isot strings.
def make_array_of_local_datetimes(julian_array):
    output_array = output_array = np.chararray(len(julian_array), itemsize=26, unicode=True)
    for i, julian_time in enumerate(julian_array):
        output_array[i] = convert_julian_to_datetime_local(julian_time)
    return output_array
    

def convert_julian_to_datetime_utc(juliantimes):
    times = Time(juliantimes, format='jd')
    return times.isot 
    
def add_calculated_columns(df, filename):
    new_df = df.copy()
    #datetimes = convert_julian_to_datetime(new_df.MdptJulian)
    new_df['DateTimeUTC'] = convert_julian_to_datetime_utc(new_df.MdptJulian)
    new_df['DateTimeLocal'] = make_array_of_local_datetimes(new_df.MdptJulian)
    new_df['starID'] = [filename] * len(new_df.index) 
    
    return new_df
    

def populate_velocity(conn, dirname='../standard_star_velocity_measurements/', debug=False):
    filenames = ['HD10700_APF.vels', 'HD185144_APF.vels', 'HD9407_APF.vels']
    for file in filenames:
        print('started {}'.format(file))
        df_incomplete = load_velocity_csv(dirname, file)
        df_complete = add_calculated_columns(df_incomplete, file)
        if debug:
            print(df_complete)
        else: 
            df_complete.to_sql('velocity', conn, if_exists='append', index=False)
        print('tried to add {} entries to \'velocity\''.format(len(df_complete)))

#velocity_df = load_velocity_csv('../standard_star_velocity_measurements/', 'HD10700_APF.vels')
#add_calculated_columns(velocity_df, 'HD10700_APF.vels' )


In [9]:
populate_velocity(conn)

started HD10700_APF.vels
tried to add 755 entries to 'velocity'
started HD185144_APF.vels
tried to add 1655 entries to 'velocity'
started HD9407_APF.vels
tried to add 694 entries to 'velocity'


In [10]:
# create additional calculated columns on velocity
engine = create_engine('sqlite:///apf.db', echo=False)


In [41]:
#add new table telemetrytimelookup
def drop_telemetry_time_lookup(engine):
    engine.execute("""DROP TABLE IF EXISTS telemetry_time_lookup;""")
    print('dropped telemetry_time_lookup.')

def create_telemetry_time_lookup(engine):
    engine.execute("""
    CREATE TABLE telemetry_time_lookup (
    DateTime TEXT PRIMARY KEY,
    YearMonthDayHour REAL,
    EpocSeconds REAL,
    Event TEXT,
    ExposureID TEXT,
    FOREIGN KEY (DateTime) REFERENCES telemetry(DateTime) 
    )""")
    print('created telemetry_time_lookup.')

def populate_telemetry_time_lookup(engine):   
    engine.execute("""INSERT INTO telemetry_time_lookup (DateTime,
        YearMonthDayHour, EpocSeconds, Event) 
    SELECT t.DateTime, strftime('%Y-%m-%d-%H', t.datetime), strftime('%s', t.DateTime), t.Event
    from telemetry t
    """)
    print('populated created telemetry_time_lookup.')
    

def create_index_in_telemetry_time_lookup(engine):
    engine.execute("""CREATE INDEX idx_yearmonthdayhour ON 
        telemetry_time_lookup(YearMonthDayHour, EpocSeconds)
    """)
    print('created index on YearMonthDayHour in TTC.')
    
#drop_telemetry_time_lookup(engine)
#create_telemetry_time_lookup(engine)
#populate_telemetry_time_lookup(engine)
#create_index_in_telemetry_time_lookup(engine)
df = pandas.read_sql_query("""select epocseconds - 1436589861.0, event from (select * from telemetry_time_lookup
    where YearMonthDayHour = '2015-07-11-04') where abs(epocseconds - 1436589861.0) < 50  
    limit 100
    """, con=engine)
pandas.set_option('precision', 18)
print(df)



    epocseconds - 1436589861.0            Event
0                        -49.0  ControllerReady
1                        -48.0  ControllerReady
2                        -47.0  ControllerReady
3                        -46.0  ControllerReady
4                        -44.0  ControllerReady
5                        -43.0  ControllerReady
6                        -42.0  ControllerReady
7                        -41.0  ControllerReady
8                        -40.0       EraseBegin
9                        -38.0       EraseBegin
10                       -37.0    ExposureBegin
11                       -36.0    ExposureBegin
12                       -34.0    ExposureBegin
13                       -33.0    ExposureBegin
14                       -31.0    ExposureBegin
15                       -30.0    ExposureBegin
16                       -28.0    ExposureBegin
17                       -27.0    ExposureBegin
18                       -25.0    ExposureBegin
19                       -24.0    Exposu

In [None]:
print(pandas.read_sql_query("""select * from 
                            (select * from telemetry_time_lookup where 
                            
                            limit 10;
                        """, engine))

pandas.set_option('precision', 15)

#where tt.Datetime between "2015-04-02T01:30:25.832007"
  #                          and "2015-04-03T01:32:44.832007"


In [31]:
df = pandas.read_sql_query("""select datetimelocal, strftime('%s', datetimelocal) from velocity where
                datetimelocal >  '2015-07-10T04:48:05.471991' limit 10
                        """, engine)
print(df)

                DateTimeLocal strftime('%s', datetimelocal)
0  2015-07-11T04:44:21.696017                    1436589861
1  2015-07-11T04:46:24.383985                    1436589984
2  2015-07-13T04:17:53.663995                    1436761073
3  2015-07-13T04:19:58.943983                    1436761198
4  2015-07-20T03:57:50.976010                    1437364670
5  2015-07-20T04:00:58.463989                    1437364858
6  2015-07-25T03:52:53.760000                    1437796373
7  2015-07-25T03:54:20.160014                    1437796460
8  2015-07-25T03:55:50.016014                    1437796550
9  2015-07-25T03:57:19.008008                    1437796639


In [None]:
#df = pandas.read_sql_query("""select distinct event from telemetry
                        
#                        limit 100
#                        """, engine)

df

In [None]:
df = pandas.read_sql_query("""select distinct event from 
                        (select event from telemetry where 
                        DateTime between "2015-01-01T17:00:00.0000" and 
                        "2015-01-02T19:01:00.0000") 
                   
                        
                        """, engine)

df

In [None]:
df = pandas.read_sql_query(
"""SELECT
  event,
  COUNT(*) AS `num`
FROM
  (select * from telemetry where 
                        DateTime between "2015-02-02T17:00:00.0000" and 
                        "2015-02-03T05:01:00.0000") 
GROUP BY
  event                  
                        """, engine)

df


In [None]:
df = pandas.read_sql_query(
"""SELECT DateTime, event from (select * from telemetry where DateTime < "2015-01-02T19:01:00.0000"
    and (event == "ExposureEnd" or event == 'ReadoutEnd')) order by datetime
limit 100
                        """, engine)

df


In [None]:
#df1 = pandas.read_sql_query("""
#    Select Datetimelocal, Mdptjulian from velocity 
#    where DateTimeLocal > "2015-04-01T17:01:00.0000"
#    and DateTimeLocal < "2015-04-10T17:01:00.0000"
#    order by DateTimeLocal limit 100
#
#    """, engine)
#df2 = pandas.read_sql_query(
#"""select datetime from telemetry 
#    order by (abs(strftime('%s',datetime) - abs(strftime('%s',"2015-04-02T01:31:44.832007"))))  limit 1
#                        """, engine)

#print(df1)

print(df2)

#print(pandas.read_sql_query("""
#    Select t.DateTime, v.Datetimelocal, v.Mdptjulian from (velocity v, telemetry t)
#    where v.DateTimeLocal > "2015-04-01T17:01:00.0000"
#    and v.DateTimeLocal < "2015-04-10T17:01:00.0000"
#    and (t.DateTime - v.DateTi 
#    order by DateTimeLocal limit 100
#
#    """, engine))