# Dataframe creation using the Hsiao class

Example notebook showing how to obtain dataframes of generated data (flux and time sample) of supernova 1a by defining some parameters.  

In [1]:
from hsiao import Hsiao
import numpy as np
import pandas as pd
import random as rand
from random import *
import time

### Example of return datafram from Hsiao class for 1 system of 3 images. 

In [2]:
#Hsiao(nb of images, redshift, amplitude, type of plot, magnifications, time delays, time origin, noise level)

# nb of images, time origin are integers
# redshift, amplitude, noise level are floats
# type of plot is a str (choice bewteen : "Flux", "Total_Flux_Without_Noise", "Noise", "Total_Flux_With_Noise")
# magnifications, time delays are arrays of size (nb of images)

H=Hsiao(3, 0.4, 1e-4, "Flux", np.array([1.2, 1.42, 1.52]), np.array([0, 10.34, 24.32]), 55000., 0.05)

The dataframe method from Hsiao class returns 2 different dataframes:  
- the first contains all the given information from the user, e.i all the above parameters
- the second is composed of generated data calculated into Hsiao class. 

To match the first dataframe with the second one, an ID is defined and written in the first column of each dataframe. It is created thanks to the number of images, the amplitude, the redshift and the noise level to make sure there are no duplicates.

In [3]:
f1, f2 = H.dataframe()

In [4]:
f1

Unnamed: 0,ID,images,time origin,amplitude,time delays,magnifications,redshift,noise level
0,3-0.0001-0.4-0.05,3,55000.0,0.0001,[ 0. 10.34 24.32],[1.2 1.42 1.52],0.4,0.05


In [5]:
f2

Unnamed: 0,ID,time sample band g,total flux + noise band g,time sample band r,total flux + noise band r,time sample band i,total flux + noise band i
0,3-0.0001-0.4-0.05,54942.48,16.122651,54941.48,14.789232,54945.88,13.618154
1,3-0.0001-0.4-0.05,54945.28,22.858027,54943.28,9.071923,54953.08,13.987194
2,3-0.0001-0.4-0.05,54948.08,26.104037,54945.08,9.026746,54958.28,19.280764
3,3-0.0001-0.4-0.05,54948.88,15.867746,54946.88,17.654394,54960.48,48.333877
4,3-0.0001-0.4-0.05,54951.68,-1.070113,54948.68,14.317853,54961.68,116.044908
...,...,...,...,...,...,...,...
86,3-0.0001-0.4-0.05,55113.28,30.231925,55109.28,42.743771,,
87,3-0.0001-0.4-0.05,55114.08,16.914728,55111.08,29.574207,,
88,3-0.0001-0.4-0.05,55116.88,17.113194,55114.88,37.082171,,
89,3-0.0001-0.4-0.05,55119.68,18.867746,55115.68,29.404348,,


### Example of dataframe with multiple systems. 

First, the time delays need to be defined such as the first is null and the others are increasing.

In [6]:
def delays(n):
    DT = [0]
    for i in range(1, n):
        DT.append(DT[i-1] + uniform(5, 15))
    return [round(num, 4) for num in DT]

Secondly, to create dataframes containing 20 systems, the parameters are calculated randomly or followinf a unifrom distribution.  
Then, the produced dataframe are concatenate and we obtain 2 dataframes with one composed of all the parameters, and the second the generated data.

In [7]:
start = time.time()

f1 = 0
f2 = 0
f1bis = 0
f2bis = 0
i=0

while i<10000:
    nb_images = rand.choice([1, 2, 3, 4])  #(to maximize non lensed supernova)
    redshift = np.around(uniform(1, 2.5), 4)      # do not put more than 2.5
    amplitude = np.around(uniform(1e-5, 1e-1),4)
    mu = [1 + np.around(rand.random(), 4) for i in range(nb_images)]
    #print(nb_images, mu)
    time_delays = delays(nb_images)
    #t0 = np.random.randint(55000, 60000)
    t0 = 55000.      #if not, fluxes = csts -> why ? (maybe) because of the def of Hsiao model
    noise_level = np.around(uniform(0, 0.2), 4)
    H = Hsiao(nb_images, redshift, amplitude, "Flux", np.array(mu), np.array(time_delays), t0, noise_level, nobs = np.array([91, 91, 91]), ID= i)

    if i == 0:
        f1bis, f2bis = H.dataframe()
        i+=1
    else:
        f1, f2 = H.dataframe()
        
        if f2.isnull().values.any()==False:
            f1 = pd.concat([f1bis, f1])
            f2 = pd.concat([f2bis, f2])
            f1bis = f1
            f2bis = f2
            i+=1
end = time.time()
print("The time of execution of above program is :", end-start)

  noises = np.full((len(self.bands), max(self.nobs)), self.pers*np.nanmax(self.total_flux_without_noise()))


The time of execution of above program is : 428.48359394073486


In [8]:

check_for_nan = f1.isnull().values.any()
print (check_for_nan)

False


In [9]:
check_for_nan = f2.isnull().values.any()
print (check_for_nan)

False


In [10]:
f1

Unnamed: 0,ID,images,time origin,amplitude,time delays,magnifications,redshift,noise level
0,0,1,55000.0,0.0795,[0],[1.96],1.6957,0.1676
0,1,4,55000.0,0.0019,[ 0. 14.73 23.36 36.26],[1.21 1.9 1.42 1.23],1.3082,0.0226
0,2,3,55000.0,0.0118,[ 0. 7.65 14.81],[1.75 1.41 1.74],1.0825,0.0083
0,3,3,55000.0,0.0154,[ 0. 6.63 14.93],[1.42 1.31 1.64],1.7082,0.1389
0,4,4,55000.0,0.0132,[ 0. 5.12 19.52 27.45],[1.04 1.74 1.53 1.75],1.3018,0.1011
...,...,...,...,...,...,...,...,...
0,9995,2,55000.0,0.07,[ 0. 13.76],[1.14 1.32],1.994,0.1515
0,9996,3,55000.0,0.0243,[ 0. 6.47 18.15],[1.71 1.13 1.43],2.134,0.1931
0,9997,3,55000.0,0.0466,[ 0. 10.09 18.89],[1.22 1.52 1.11],1.4547,0.0105
0,9998,4,55000.0,0.0827,[ 0. 9.51 19.54 27.8 ],[1.28 1.74 1.92 1. ],1.1192,0.1787


In [11]:
f2[:91]

Unnamed: 0,ID,images,time sample band g,total flux + noise band g,time sample band r,total flux + noise band r,time sample band i,total flux + noise band i
0,0,1,54965.8,9857.548565,54966.8,12808.463139,54969.2,46451.732114
1,0,1,54966.6,10549.269195,54967.6,14277.550934,54974.4,70732.207850
2,0,1,54967.4,16872.628352,54968.4,11533.402431,54978.6,78989.423208
3,0,1,54970.2,12590.366603,54969.2,20060.419435,54983.8,78834.373466
4,0,1,54972.0,13184.015601,54972.0,12283.637781,54989.0,106359.738547
...,...,...,...,...,...,...,...,...
86,0,1,55125.6,3499.687284,55120.6,3493.996352,55424.4,15596.422526
87,0,1,55126.4,4783.304837,55122.4,12600.181485,55428.6,2834.043979
88,0,1,55127.2,9358.904012,55123.2,10744.485320,55430.8,9900.469006
89,0,1,55130.0,8613.177603,55126.0,3692.878634,55438.0,10380.532886


In [12]:
f1.to_csv("truth.csv", index=False)
f2.to_csv("data.csv", index=False)

### Example of dataframe with flux without noise. 

In [None]:
start = time.time()

f1 = 0
f2 = 0
f1bis = 0
f2bis = 0
i=0

while i<10000:
    nb_images = rand.choice([1, 2, 3, 4])  #(to maximize non lensed supernova)
    redshift = np.around(uniform(1, 2.5), 4)      # do not put more than 2.5
    amplitude = np.around(uniform(1e-5, 1e-1),4)
    mu = [1 + np.around(rand.random(), 4) for i in range(nb_images)]
    time_delays = delays(nb_images)
    #t0 = np.random.randint(55000, 60000)
    t0 = 55000.      #if not, fluxes = csts -> why ? (maybe) because of the def of Hsiao model
    noise_level = np.around(uniform(0, 0.2), 4)
    H = Hsiao(nb_images, redshift, amplitude, "Flux", np.array(mu), np.array(time_delays), t0, noise_level, nobs = np.array([91, 91, 91]), ID= i)

    if i == 0:
        f1bis, f2bis = H.dataframeF()
        i+=1
    else:
        f1, f2 = H.dataframeF()
        
        if f2.isnull().values.any()==False:
            f1 = pd.concat([f1bis, f1])
            f2 = pd.concat([f2bis, f2])
            f1bis = f1
            f2bis = f2
            i+=1
end = time.time()
print("The time of execution of above program is :", end-start)

In [None]:
f1.to_csv("truthF.csv", index=False)
f2.to_csv("dataF.csv", index=False)