In [34]:
remote_timing_file = "http://portal.nersc.gov/project/astro250/xswap/iptf/timing.csv"
remote_image_file = "http://portal.nersc.gov/project/astro250/xswap/iptf/image.csv"
remote_readme_file = "http://portal.nersc.gov/project/astro250/xswap/iptf/README"

# Please set local storage for the files
target_dir = "~/tmp/iptf"
!mkdir -p $target_dir;

local_timing_file = target_dir + "/timing.csv"
local_image_file = target_dir + "/image.csv"
local_readme_file = target_dir + "/README"
sorted_image_file = target_dir + "/sorted_image.csv"
sorted_timing_file = target_dir + "/sorted_timing.csv"

import os.path
if not os.path.exists(local_readme_file):
    print("Downloading data. It may take a while depending on your network connection.")
    !cd $target_dir;wget $remote_timing_file
    !cd $target_dir;wget $remote_image_file
    !cd $target_dir;wget $remote_readme_file

!cd $target_dir;cat README

Downloading data. It may take a while depending on your network connection.
--2018-08-01 14:27:31--  http://portal.nersc.gov/project/astro250/xswap/iptf/timing.csv
Resolving portal.nersc.gov (portal.nersc.gov)... 128.55.201.128
Connecting to portal.nersc.gov (portal.nersc.gov)|128.55.201.128|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1120623419 (1.0G) [text/csv]
Saving to: ‘timing.csv’


2018-08-01 14:28:18 (22.6 MB/s) - ‘timing.csv’ saved [1120623419/1120623419]

--2018-08-01 14:28:18--  http://portal.nersc.gov/project/astro250/xswap/iptf/image.csv
Resolving portal.nersc.gov (portal.nersc.gov)... 128.55.201.128
Connecting to portal.nersc.gov (portal.nersc.gov)|128.55.201.128|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 96224615 (92M) [text/csv]
Saving to: ‘image.csv’


2018-08-01 14:28:22 (21.4 MB/s) - ‘image.csv’ saved [96224615/96224615]

--2018-08-01 14:28:23--  http://portal.nersc.gov/project/astro250/xswap/iptf/README
Reso

In [139]:
import pandas as pd
import datetime

def get_timing_features():
    time_features = []
    utime_features = []
    for i in range(0,40):
        time_features.append('time_' + str(i))
        utime_features.append('utime_' + str(i))
    return time_features, utime_features



def loadImageFile (image_file, timing_file):
    print ("reading " + image_file)
    images = pd.read_csv(image_file, index_col = 0)
    time_features, utime_features = get_timing_features()
    features = time_features + utime_features
    tf = {}
    
    for x in features:
        images.insert(loc=len(images.keys()), column = x, value = None)
        tf[x] = pd.DataFrame(columns=[x])
   
    print ("reading " + timing_file)
    timings = pd.read_csv(timing_file) 
    print ("re-indexing")
    timings.reset_index(inplace=True)
    print ("merging files")
    t_index = 0
    t_len = len(timings)
    if False:
        return images,timings, None
    
    while t_index < t_len:
        for cid in range(40):
            cp = timings.iloc[t_index + cid]
            cp_id = cp['id']
            cp_cid = cp['checkpointid']
            cp_time = cp['time']
            cp_utime = cp['utime']
            name = 'time_' + str(cp_cid)
            tf[name].append(pd.DataFrame([{name:cp_time}]))
            name = 'utime_' + str(cp_cid)
            tf[name].append(pd.DataFrame([{name:cp_utime}]))
            if (t_index + cid) % 1000 == 0:
                print(str(t_index) + " of " + str(t_len) +"\t\t"+ str(datetime.datetime.now()))
        t_index += 40
        if t_index >= 10000:
            break
    return images, timings, tf


def normalize_file(src_file, dst_file):
    print ('load ' + src_file)
    timings = pd.read_csv(src_file)
    print ('sort id' + src_file)
    sorted_timings = timings.sort_values(by='id')
    print ("write " + dst_file)
    sorted_timings.to_csv(path_or_buf=dst_file, index=False)
    print ("completed")


In [None]:
   z = timings[['checkpointid','time','utime']]
    nb = len(z)
    i = 0
    j = 0
    while j < nb:
        x= ((z[j:j+40]).sort_values(by='checkpointid'))[['time','utime']]
        rt = pd.DataFrame(x['time'])
        rt.index = t
        ru = pd.DataFrame(x['utime'])
        ru = ru.rename (columns= {"utime":"time"})
        ru.index = u
        rr = rt.append(ru, sort=False)
        rr = rr.T 
        #print (rr.iloc[0])
        images.iloc[i,18:] = rr.iloc[0]
        #print (images.iloc[i])
        j += 40
        i += 1
        if i % 1000:
            print(str + datetime.datetime.now())

In [39]:
normalize_file(local_image_file, sorted_image_file)
normalize_file(local_timing_file, sorted_timing_file)

load ~/tmp/iptf/image.csv
sort id~/tmp/iptf/image.csv
write ~/tmp/iptf/sorted_image.csv
load ~/tmp/iptf/timing.csv
sort id~/tmp/iptf/timing.csv
write ~/tmp/iptf/sorted_timing.csv


In [140]:
images=None
timings=None

images, timings, tf = loadImageFile(local_image_file, local_timing_file)

reading ~/tmp/iptf/image.csv
reading ~/tmp/iptf/timing.csv
re-indexing
merging files
0 of 24019080		2018-08-01 16:02:48.814610
1000 of 24019080		2018-08-01 16:02:50.766779
2000 of 24019080		2018-08-01 16:02:52.791660
3000 of 24019080		2018-08-01 16:02:54.700254
4000 of 24019080		2018-08-01 16:02:56.616242
5000 of 24019080		2018-08-01 16:02:58.518753
6000 of 24019080		2018-08-01 16:03:00.442865
7000 of 24019080		2018-08-01 16:03:02.394139
8000 of 24019080		2018-08-01 16:03:04.310839
9000 of 24019080		2018-08-01 16:03:06.212921


In [145]:
r = 75/10000
print (r*24019080/(60*60*24))

2.0849895833333334


transfer time vs time of night

utc_obs vs. time between step 0 and 1

Galactic latitude vs subtraction time:

gal_lat vs. time between 24 and 26

In [103]:
df = pd.DataFrame(data=[[1,2]],columns = ['a','b'])

In [255]:
z = timings[['checkpointid','time','utime']]

In [272]:
x= ((z[0:40]).sort_values(by='checkpointid'))[['time','utime']]

In [273]:
print(x)

                                time        utime
23423596     2015-03-20 03:04:37.702     0.000000
22818126  2015-03-20 04:17:12.423566  4354.721566
22217649  2015-03-20 04:17:12.506983     0.083417
21617172  2015-03-20 04:17:13.374036     0.867053
21016695  2015-03-20 04:17:13.745572     0.371536
20416218  2015-03-20 04:17:15.403216     1.657644
19815741  2015-03-20 04:17:15.542033     0.138817
19215264  2015-03-20 04:17:17.766029     2.223996
18614787  2015-03-20 04:17:18.524554     0.758525
18014310  2015-03-20 04:17:38.965292    20.440738
17413833  2015-03-20 04:17:39.001262     0.035970
16813356  2015-03-20 04:17:39.085677     0.084415
16212879  2015-03-20 04:17:41.256771     2.171094
15612402  2015-03-20 04:17:43.001172     1.744401
15011925  2015-03-20 04:17:47.205149     4.203977
14411448  2015-03-20 04:17:47.386064     0.180915
13810971  2015-03-20 04:17:47.773047     0.386983
13210494  2015-03-20 04:17:47.816621     0.043574
12610017  2015-03-20 04:18:08.843939    21.027318


In [393]:
t,u = get_timing_features()
rt = pd.DataFrame(x['time'])
rt.index = t
ru = pd.DataFrame(x['utime'])
ru = ru.rename (columns= {"utime":"time"})
ru.index = u
rr = rt.append(ru, sort=False)
rr = rr.T

In [394]:
print (rr)

                       time_0                      time_1  \
time  2015-03-20 03:04:37.702  2015-03-20 04:17:12.423566   

                          time_2                      time_3  \
time  2015-03-20 04:17:12.506983  2015-03-20 04:17:13.374036   

                          time_4                      time_5  \
time  2015-03-20 04:17:13.745572  2015-03-20 04:17:15.403216   

                          time_6                      time_7  \
time  2015-03-20 04:17:15.542033  2015-03-20 04:17:17.766029   

                          time_8                      time_9   ...     \
time  2015-03-20 04:17:18.524554  2015-03-20 04:17:38.965292   ...      

     utime_30 utime_31 utime_32 utime_33 utime_34 utime_35 utime_36 utime_37  \
time  13.7295   2.4327  35.4989   2.2319  8.63361  5.97557  2.77407  120.336   

      utime_38 utime_39  
time  0.241639  1.34092  

[1 rows x 80 columns]


In [388]:
print(len(rr))

41


In [107]:
d1 = pd.DataFrame([[1,2,3],[4,5,6]], index=[6,7],columns=['a','b','c'])

In [414]:
d2 = pd.DataFrame([[10]],columns=[1])

In [416]:
dx = pd.concat([d1,d2],axis=1)
dx

Unnamed: 0,0,1,1.1
0,1,2,10.0
1,3,4,


In [110]:
d1.append(pd.DataFrame([{'a':10,'b':11,'c':12}]))

Unnamed: 0,a,b,c
6,1,2,3
7,4,5,6
0,10,11,12


In [108]:
d1

Unnamed: 0,a,b,c
6,1,2,3
7,4,5,6


In [462]:
d1.loc[1,1:] = [10,11]

In [102]:
r = d1.iloc[0,0:]

In [104]:
r[1]

2

In [None]:
dx