# This Notebook converts data to lcdata format

basically the way you should provide the data in input to generate the prediction is a table in lcdata  format, which is explained here https://lcdata.readthedocs.io/en/latest/usage.html (edited)
[2:16 PM] There is a metadata table in it which needs the following keys
- object_id: A unique identifer. Default: randomly assigned string
- ra: The right ascension. Default: nan
- dec: The declination. Default: nan
- type: A string representing the type of the light curve. Default: Unknown
- redshift: The redshift. Default: nan
(edited)

[2:16 PM] And a lightcurve table with
- time: times at which the light curve was sampled. Converted to a 64-bit float.
- flux: The flux at each point on the light curve. Converted to a 32-bit float.
- fluxerr: The uncertainty on the flux. Converted to a 32-bit float.
- band: A string representing bandpass that the light curve was observed in. We recommend using the sncosmo bandpass names here. Converted to a binary string.

In [36]:
import sys
import os
#print(sys.path)
from astropy.io import fits
from astropy.table import Table, Column, join, hstack, vstack, unique, setdiff
from astropy import units as u
from astropy.coordinates import SkyCoord, match_coordinates_sky, Angle
from astropy.time import Time
#import desitarget.io as io
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import glob
import healpy as hp
import glob
import json
import lcdata
import get_redshift
import time
import h5py
# import parsnip

import sqlite3
import os
import logging

In [37]:
path = '/global/cfs/cdirs/desi/science/td/DECam/imaging/LCData_Legacy'

In [10]:
class lc_data_container():
    list_without_duplicates = []
    
    def __init__(self, servey, container=[]):
        self.servey = servey
        self.container = container
        
    add_lc = lambda self, x: self.container.append(x)
    
    def rm_duplicates(self):
        self.list_without_duplicates = []
        obj_id_list = []
        for j,i in enumerate(self.container):
            if i.light_curve is not None and i.light_curve.meta['object_id'] not in obj_id_list:
                self.list_without_duplicates.append(i)
                obj_id_list.append(i.light_curve.meta['object_id'])
        return self.list_without_duplicates
  
    
class transients():
    filter_type = 'undeclared'
    light_curve = None
    meta = None
    
    def __init__(self, data):
        self.data = data
        
    def assign_meta(self, meta_data):
        self.meta = meta_data
        self.ra_pseudo_host = meta_data['RA-PSEUDO-HOST']
        self.dec_pseudo_host = meta_data['DEC-PSEUDO-HOST']
    
    def use_lsst_filters(self):
        if self.filter_type == 'undeclared':
            self.data['band'] = self.data['band'].replace(['r','g','z'],['lsstr','lsstg', 'lsstz'])
        else: self.data['band'] = self.data['band'].replace(['desr','desg', 'desz'],['lsstr','lsstg', 'lsstz'])
        self.filter_type = 'lsst'    
    
    def use_des_filters(self):
        if self.filter_type == 'undeclared':
            self.data['band'] = self.data['band'].replace(['r','g','z'],['desr','desg', 'desz'])
        else: self.data['band'] = self.data['band'].replace(['lsstr','lsstg', 'lsstz'],['desr','desg', 'desz'])
        self.filter_type = 'des'
        
    def create_lc_data(self):
        if self.meta['redshift'] != -99:
            flux = list(10**((22.5-self.data['mag'])/2.5))
            magerr = list(self.data['magerr'])
            fluxerr = np.array(flux) * np.log(10) / 2.5 * np.array(magerr)

            self.light_curve = Table({
            'time': list(self.data['time']),
            'flux': flux,
            'fluxerr': list(fluxerr),
            'band': list(self.data['band']),})

            self.light_curve.meta = {
            'object_id': self.meta['ObjectID'],
            'ra': self.meta['RA-OBJECT'],
            'dec': self.meta['DEC-OBJECT'],
            'type': 'Unknown',
            'redshift': self.meta['redshift'],
            'z_spec_exists': self.meta['z_spec'],
            'hostgal_photoz': self.meta['hostgal_photoz'],
            'hostgal_photoz_err': self.meta['hostgal_photoz_err']}
        else: pass
        

In [11]:
transient_dirs = []
legacy = lc_data_container('LCData_Legacy')
for dirname, dirnames, filenames in os.walk(path):
    for subdirname in dirnames:
        transient_dirs.append(os.path.join(dirname, subdirname))

for dir in transient_dirs:
    
    for file in glob.glob(dir+'/lc*.csv'):
        data = pd.read_csv(file)
        data.rename(columns = {'filter':'band'}, inplace = True)
        data.rename(columns = {'mjd':'time'}, inplace = True)
        alert_mask=data['alert']
        masked_data=data[alert_mask]
        masked_data.drop(columns='alert')
        transient = transients(masked_data)
    for file in glob.glob(dir+'/object-summary.json'):
        with open(file) as f:
            meta_data = json.load(f)
            
            transient.assign_meta(dict(meta_data))
            
    legacy.add_lc(transient)

       
      

In [13]:
#print(first.meta.keys())
start_tot = time.time()
for i, j in enumerate(legacy.container[:]):
    start = time.time()
    a = get_redshift.do_all(j.ra_pseudo_host, j.dec_pseudo_host)
    end = time.time()
    print('runtime:', end - start)
    if a['z_spec'] != -99:
        j.meta['redshift'] = a['z_spec']
        j.meta['z_spec'] = 1
    else:
        j.meta['redshift'] = a['mean']
        j.meta['z_spec'] = 0
    j.meta['hostgal_photoz'] = a['mean']
    j.meta['hostgal_photoz_err'] = a['err']
    print(a)
end_tot = time.time()
print('total runtime:', end_tot - start_tot)

runtime: 36.11046576499939
{'z_spec': 0.29418, 'mean': 0.33227527, 'err': 0.053414814}
runtime: 34.242786169052124
{'z_spec': -99.0, 'mean': 0.7428861, 'err': 0.09449511}
runtime: 32.618505001068115
{'z_spec': -99.0, 'mean': 0.8812608, 'err': 0.11874933}
runtime: 35.6358847618103
{'z_spec': -99.0, 'mean': 0.52417517, 'err': 0.16277674}
runtime: 33.554964542388916
{'z_spec': -99.0, 'mean': 0.54615086, 'err': 0.100585245}
runtime: 32.13948106765747
{'z_spec': -99.0, 'mean': 0.48838532, 'err': 0.21478653}
runtime: 35.49452877044678
{'z_spec': -99.0, 'mean': 0.6684589, 'err': 0.037222046}
runtime: 32.610225439071655
{'z_spec': -99.0, 'mean': 0.30342153, 'err': 0.080004215}
runtime: 33.134615421295166
{'z_spec': -99.0, 'mean': 0.14071555, 'err': 0.014162116}
runtime: 40.015995025634766
{'z_spec': -99.0, 'mean': 1.071399, 'err': 0.1839605}
runtime: 33.41290640830994
{'z_spec': -99.0, 'mean': 0.51206154, 'err': 0.112478554}
runtime: 34.34812545776367
{'z_spec': 0.09955253, 'mean': 0.10922255,

AttributeError: 'transients' object has no attribute 'ra_pseudo_host'

In [14]:
print(len(legacy.container))

for i in legacy.container:
    i.use_lsst_filters()
    
for i in legacy.container[::10]:
    print(i.filter_type)

591
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst
lsst


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data['band'] = self.data['band'].replace(['r','g','z'],['lsstr','lsstg', 'lsstz'])


In [19]:
print(len(legacy.container))
print(legacy.container[0].meta)
for i in legacy.container:
    i.create_lc_data()
    print(i.light_curve.meta)

591
{'ObjectID': 'T202104061414596p021612', 'RA-OBJECT': 213.74844127, 'DEC-OBJECT': 2.27027605, 'NumberAlerts': 17, 'MaxSCORE': 0.936, 'RA-PSEUDO-HOST': 213.74890308, 'DEC-PSEUDO-HOST': 2.27030717, 'SEP-PSEUDO-HOST': 1.665, 'RA-NEIGHBOR-STAR': 213.74804695, 'DEC-NEIGHBOR-STAR': 2.26649061, 'SEP-NEIGHBOR-STAR': 13.7012, 'Discovery-Round': 5, 'Discovery-Time': '2021-04-06T04:43:23.365', 'Discovery-Filter': 'S', 'Discovery-Magnitude': 21.52, 'Discovery-SNR': 30.3, 'Latest-Round': 14, 'Latest-Time': '2021-05-06T03:41:23.252', 'Latest-Filter': 'S', 'Latest-Magnitude': 22.6, 'Latest-SNR': 11.4, 'redshift': 0.29418, 'z_spec': 1, 'hostgal_photoz': 0.33227527, 'hostgal_photoz_err': 0.053414814}


TypeError: 'NoneType' object is not subscriptable

In [20]:
for i in legacy.container:
    
        flux = list(10**((22.5-i.data['mag'])/2.5))
        magerr = list(i.data['magerr'])
        fluxerr = np.array(flux) * np.log(10) / 2.5 * np.array(magerr)
        
        i.light_curve = Table({
        'time': list(i.data['time']),
        'flux': flux,
        'fluxerr': list(fluxerr),
        'band': list(i.data['band']),})
        
        i.light_curve.meta = {
        'object_id': i.meta['ObjectID'],
        'ra': i.meta['RA-OBJECT'],
        'dec': i.meta['DEC-OBJECT'],
        'type': 'Unknown',
        'redshift': i.meta['redshift'],
        'z_spec_exists': i.meta['z_spec'],
        'hostgal_photoz': i.meta['hostgal_photoz'],
        'hostgal_photoz_err': i.meta['hostgal_photoz_err']}
        

TypeError: 'NoneType' object is not subscriptable

In [25]:
for i,j in enumerate(legacy.container):
    print(i)
    if j.light_curve == None:
        print(j.light_curve)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

  result = self.as_array() == other


In [31]:
lc_container = legacy.container[:560]
list_without_duplicates = []
#obj_id_list = [i.light_curve.meta['object_id'] for i in lc_container]
obj_id_list = []
for j,i in enumerate(lc_container):
    #print(i.light_curve.meta.keys())
    print(j)
    if i.light_curve.meta['object_id'] not in obj_id_list:
        list_without_duplicates.append(i)
        obj_id_list.append(i.light_curve.meta['object_id'])
        
print(len(list_without_duplicates))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [33]:
dataset = lcdata.from_light_curves([i.light_curve for i in list_without_duplicates])
dataset.write_hdf5('./dataset_all.h5')


In [21]:
dataset_parsnip = parsnip.load_dataset('./dataset_all.h5')

NameError: name 'parsnip' is not defined

In [11]:
dataset.meta

object_id,ra,dec,type,redshift,hostgal_photoz,hostgal_photoz_err
str23,float64,float64,str7,float64,float32,float32
A202103221409059m023156,212.27475706,-2.53247783,Unknown,0.5241751670837402,0.52417517,0.16277674
A202104121444394p003321,221.16428014,0.55583529,Unknown,0.668458878993988,0.6684589,0.037222046
A202104181450313m033255,222.63077117,-3.54876824,Unknown,1.0713989734649658,1.071399,0.1839605
A202106021427485m033040,216.95229358,-3.51122702,Unknown,0.3034215271472931,0.30342153,0.080004215
T202104061414596p021612,213.74844127,2.27027605,Unknown,0.2941800057888031,0.33227527,0.053414814
T202104091407441p002208,211.93404463,0.36905789,Unknown,0.8812608122825623,0.8812608,0.11874933
T202104251422420m032332,215.67523987,-3.39237579,Unknown,0.140715554356575,0.14071555,0.014162116
T202105031422035p013900,215.51486732,1.65011899,Unknown,0.4883853197097778,0.48838532,0.21478653
T202105241209144p010706,182.31034601,1.11858508,Unknown,0.5461508631706238,0.54615086,0.100585245
T202105301405547p043407,211.47793891,4.56862559,Unknown,0.7428861260414124,0.7428861,0.09449511


In [12]:
dataset.light_curves

array([<Table length=21>
              time           flux      fluxerr    band
            float64        float32     float32   bytes5
       ------------------ ---------- ----------- ------
         59295.3430873505  1.0280482  0.15227889  lsstz
        59295.34549629514 0.96273476 0.078796975  lsstr
        59297.30023727491 0.94792324  0.09148465  lsstr
        59297.30644048888 0.78491366  0.07785233  lsstr
       59310.273680682025  0.7991057  0.10739042  lsstr
        59310.27483195777  1.1988628   0.1747958  lsstz
        59313.22657290902  0.6333319  0.08948127  lsstr
         59313.2277261672  1.4584287  0.16296299  lsstz
        59313.29096046869  0.6567647  0.09004591  lsstr
         59316.2122567303 0.84779525  0.09005973  lsstr
        59316.21339816067  1.3873651  0.18442632  lsstz
       59316.278509972646 0.60615456  0.09417943  lsstr
       59316.279678940096  1.5115135  0.20777583  lsstz
       59319.249057558125  0.8643286  0.08200936  lsstr
        59319.2501956592

In [7]:
a = legacy.container[561]
print(a)

<__main__.transients object at 0x1555123208e0>


In [8]:
print(a.meta)

None


In [20]:
with h5py.File('dataset.h5', 'r') as f:
    print(f.keys())
    obs = f['observations']
    
print(obs['time'][:])

<KeysViewHDF5 ['_i_observations', 'metadata', 'metadata.__table_column_meta__', 'observations']>


ValueError: Not a dataset (not a dataset)

In [1]:
start_tot = time.time()
for i, j in enumerate(legacy.container):
    start = time.time()
    a = get_redshift.do_all(j.ra_pseudo_host, j.dec_pseudo_host)
    end = time.time()
    print('runtime:', end - start)
    if a['z_spec'] != -99:
        j.meta['redshift'] = a['z_spec']
        j.meta['z_spec'] = 1
    else:
        j.meta['redshift'] = a['mean']
        j.meta['z_spec'] = 0
    j.meta['hostgal_photoz'] = a['mean']
    j.meta['hostgal_photoz_err'] = a['err']
    print(a)
end_tot = time.time()
print('total runtime:', end_tot - start_tot)

for i in legacy.container:
    i.use_lsst_filters()
    
for i in legacy.container:
    i.create_lc_data()

NameError: name 'time' is not defined