# Dataframe creation using the Hsiao class

Example notebook showing how to obtain dataframes of generated data (flux and time sample) of supernova 1a by defining some parameters.  

In [1]:
from hsiao import Hsiao
import numpy as np
import pandas as pd
import random as rand
from random import *
import time

### Example of return datafram from Hsiao class for 1 system of 3 images. 

In [2]:
#Hsiao(nb of images, redshift, amplitude, type of plot, magnifications, time delays, time origin, noise level)

# nb of images, time origin are integers
# redshift, amplitude, noise level are floats
# type of plot is a str (choice bewteen : "Flux", "Total_Flux_Without_Noise", "Noise", "Total_Flux_With_Noise")
# magnifications, time delays are arrays of size (nb of images)

H=Hsiao(3, 0.4, 1e-4, "Flux", np.array([1.2, 1.42, 1.52]), np.array([0, 10.34, 24.32]), 55000., 0.05)

The dataframe method from Hsiao class returns 2 different dataframes:  
- the first contains all the given information from the user, e.i all the above parameters
- the second is composed of generated data calculated into Hsiao class. 

To match the first dataframe with the second one, an ID is defined and written in the first column of each dataframe. It is created thanks to the number of images, the amplitude, the redshift and the noise level to make sure there are no duplicates.

In [3]:
f1, f2 = H.dataframe()

In [4]:
f1

Unnamed: 0,ID,images,time origin,amplitude,time delays,magnifications,redshift,noise level
0,3-0.0001-0.4-0.05,3,55000.0,0.0001,[ 0. 10.34 24.32],[1.2 1.42 1.52],0.4,0.05


In [5]:
f2

Unnamed: 0,ID,time sample band g,total flux + noise band g,time sample band r,total flux + noise band r,time sample band i,total flux + noise band i
0,3-0.0001-0.4-0.05,54941.48,-52.310354,54942.48,-32.149251,54945.88,13.559375
1,3-0.0001-0.4-0.05,54943.28,1.055241,54945.28,42.793326,54951.08,-42.129363
2,3-0.0001-0.4-0.05,54947.08,-43.235822,54949.08,4.010133,54954.28,35.197385
3,3-0.0001-0.4-0.05,54948.88,-7.379637,54951.88,34.113400,54960.48,40.413303
4,3-0.0001-0.4-0.05,54949.68,34.478072,54952.68,6.886342,54967.68,69.649973
...,...,...,...,...,...,...,...
86,3-0.0001-0.4-0.05,55082.28,38.426223,55101.28,11.546932,,
87,3-0.0001-0.4-0.05,55084.08,7.878134,55103.08,1.377723,,
88,3-0.0001-0.4-0.05,55087.88,-34.006218,55105.88,33.361834,,
89,3-0.0001-0.4-0.05,55088.68,47.185373,55107.68,21.173140,,


### Example of dataframe with multiple systems. 

First, the time delays need to be defined such as the first is null and the others are increasing.

In [6]:
def delays(n):
    DT = [0]
    for i in range(1, n):
        DT.append(DT[i-1] + uniform(5, 15))
    return [round(num, 4) for num in DT]

Secondly, to create dataframes containing 20 systems, the parameters are calculated randomly or followinf a unifrom distribution.  
Then, the produced dataframe are concatenate and we obtain 2 dataframes with one composed of all the parameters, and the second the generated data.

In [None]:
start = time.time()

f1 = 0
f2 = 0
f1bis = 0
f2bis = 0

for i in range(5000):
    nb_images = rand.choice([1, 1, 1, 2, 3, 4])  #to maximize non lensed supernova
    redshift = np.around(uniform(1, 2.5), 4)      # do not put more than 2.5
    amplitude = np.around(uniform(1e-5, 1e-1),4)
    mu = [1 + np.around(rand.random(), 4) for i in range(nb_images)]
    time_delays = delays(nb_images)
    #t0 = np.random.randint(55000, 60000)
    t0 = 55000.      #if not, fluxes = csts -> why ? (maybe) because of the def of Hsiao model
    noise_level = np.around(uniform(0, 0.2), 4)
    H = Hsiao(nb_images, redshift, amplitude, "Flux", np.array(mu), np.array(time_delays), t0, noise_level)

    if i == 0:
        f1bis, f2bis = H.dataframe()
    else:
        f1, f2 = H.dataframe()
        f1 = pd.concat([f1bis, f1])
        f2 = pd.concat([f2bis, f2])
        f1bis = f1
        f2bis = f2
end = time.time()
print("The time of execution of above program is :", end-start)

In [None]:
f1

In [None]:
f2

In [None]:
f1.to_csv("truth.csv", index=False)
f2.to_csv("data.csv", index=False)

### Example of dataframe with multiple systems but with the number of images in the data dataframe.

In [17]:
start = time.time()

f1 = 0
f2 = 0
f1bis = 0
f2bis = 0

for i in range(5000):
    nb_images = rand.choice([1, 2, 3, 4])  #we want the same # of syst for each # images (classification)
    redshift = np.around(uniform(1, 2.5), 4)      # do not put more than 2.5
    amplitude = np.around(uniform(1e-5, 1e-1),4)
    mu = [1 + np.around(rand.random(), 4) for i in range(nb_images)]
    time_delays = delays(nb_images)
    t0 = 55000.
    noise_level = np.around(uniform(0, 0.2), 4)
    H = Hsiao(nb_images, redshift, amplitude, "Flux", np.array(mu), np.array(time_delays), t0, noise_level,nobs = np.array([91, 91, 91]) )

    if i == 0:
        f1bis, f2bis = H.dataframe2()
    else:
        f1, f2 = H.dataframe2()
        f1 = pd.concat([f1bis, f1])
        f2 = pd.concat([f2bis, f2])
        f1bis = f1
        f2bis = f2
end = time.time()
print("The time of execution of above program is :", end-start)

  noises = np.full((len(self.bands), max(self.nobs)), self.pers*np.nanmax(self.total_flux_without_noise()))


The time of execution of above program is : 193.25567507743835


In [18]:
f1

Unnamed: 0,images,time origin,amplitude,time delays,magnifications,redshift,noise level
0,3,55000.0,0.0038,[ 0. 7.16 18.35],[1.16 1.25 1.61],2.3168,0.0108
0,4,55000.0,0.0466,[ 0. 9.36 19.75 27.11],[1.1 1.71 1.33 1.5 ],2.1203,0.1628
0,2,55000.0,0.0595,[ 0. 14.8],[1.62 1.34],1.8534,0.1762
0,2,55000.0,0.0258,[ 0. 14.15],[1.58 1.85],1.8829,0.0144
0,1,55000.0,0.0837,[0],[1.99],1.8445,0.0417
...,...,...,...,...,...,...,...
0,1,55000.0,0.0065,[0],[1.54],2.1326,0.1229
0,1,55000.0,0.0215,[0],[1.27],1.1306,0.131
0,1,55000.0,0.0752,[0],[1.76],1.1455,0.0572
0,2,55000.0,0.0066,[ 0. 14.86],[1.75 1.84],1.1272,0.1824


In [19]:
f2

Unnamed: 0,images,time sample band g,total flux + noise band g,time sample band r,total flux + noise band r,time sample band i,total flux + noise band i
0,3,54948.4531,94.922816,54949.4531,226.738475,54949.8531,433.174624
1,3,54950.2531,100.280537,54951.2531,266.087599,54957.0531,503.996277
2,3,54951.0531,143.333837,54952.0531,294.343885,54960.2531,709.746659
3,3,54953.8531,187.350336,54954.8531,373.450270,54967.4531,956.826204
4,3,54954.6531,208.332275,54956.6531,395.914928,54970.6531,981.441598
...,...,...,...,...,...,...,...
86,3,55116.3114,-460.647620,55105.3114,957.085278,55399.1114,501.197337
87,3,55117.1114,-337.068293,55107.1114,2574.629600,55403.3114,550.647606
88,3,55119.9114,-3324.193226,55110.9114,5041.509573,55406.5114,3017.204650
89,3,55121.7114,105.030892,55111.7114,681.028571,55411.7114,703.966201


In [24]:
f1.to_csv("truth2.csv", index=False)
f2.to_csv("data2.csv", index=False)