In [13]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import json
from scipy.integrate import odeint
import Global as gl

In [14]:
print(gl.info)


seed_value = 41
eps=0.02
h=1
step=0.001
mu,sigma=0,0.1
macrostep=5
microstep=1
model_name = ToyModel6
x_0 = [1]
w=60


In [15]:
# Set a seed for reproducibility
np.random.seed(gl.seed_value)
model_name = gl.model_name

# Reality ODE (Data Generation model)
We will have 1D reality to test :

$\frac{dX^\epsilon}{dt} = -X^\epsilon cos((2\pi + \epsilon cos(10t))\frac{t}{\epsilon}) $

$X^{\epsilon}(0) = x = 1$

# Assimilation  Model:
In terms of assimilation we will use the following model  :

$\frac{dX^\epsilon}{dt} = -X^\epsilon cos(2\pi \frac{t}{\epsilon}) $

# Analytical solution :

$X^\epsilon(t) = e^{-\epsilon \frac{sin(2\pi \frac{t}{\epsilon})}{2\pi}}$

# Homogenization :


$\begin{cases}
X^\epsilon(t) = X^0(t)-\epsilon \frac{sin(2\pi \frac{t}{\epsilon})}{2\pi} = \Phi(t,\epsilon,A,v,X^0)\\
X^0(t) = 1
\end{cases}$

# Transition
$\begin{cases}
X^\epsilon(t) = wX^0(t)+(1-w)A-\epsilon \frac{sin((2\pi+v) \frac{t}{\epsilon})}{2\pi+v}\\
w = \frac{1}{2}
\end{cases}$







# Parameters
$\begin{cases}
\epsilon = 1/50\\
w = 60s\\
T = 10 hours\\
\Delta t=30minutes\\
\Delta t_\epsilon=10^{-3}seconds\\
h = \frac{1s}{\Delta t_\epsilon}\\
\mu_0,\sigma_0=0,0.1\\
\end{cases}$

## Reality 


# Data Generation

I begin by creating a dataframe containing the generated values across a period of "h" hours where the data is generated each "step" seconds meaning $\Delta t=step$ .

* Ue the microscopic analytical solution.
* U0 the macroscopic analytical solution.
* eps is the microstructure ratio $\epsilon$.
* h number of hours.
* step is the time step proportional to $\epsilon$.
* window is the rolling average time window in case we don't want to use U0.


In [16]:
def genData(Ue,U0,eps,h,step,x1,x2,x3,x4):
    #generate the microscopic data using Ue h hours each step time
    v=[]
    for i in range(1,h*3600+101):
        for j in range(int(1/step)):
            micro_state = Ue(i+step*j,eps,x1,x2,x3,x4)
            v.append([i+step*j,micro_state[0],micro_state[1],micro_state[2],micro_state[3]])

    #save results in pandas
    arr= np.array(v)
    df = pd.DataFrame(arr)
    df.columns = ['t','Ve1','Ve2','Xe1','Xe2']

    #generate macroscopic data using U0
    macro_state = U0(x1,x2,x3,x4)
    df['V01'] = macro_state[0]
    df['V02'] = macro_state[1]
    df['X01'] = macro_state[2]
    df['X02'] = macro_state[3]
    return df


## Rolling average generator

In [17]:
#Data generator using moving average with time window instead of U0 
def genDatawithMean(Ue,x1,x2,x3,x4,window,eps,h,step):
    v=[]
    for i in range(1,h*3600+101):
        for j in range(int(1/step)):
            state = Ue(i+step*j,eps,x1,x2,x3,x4)
            v.append([i+step*j,state[0],state[1],state[2],state[3]])

    arr= np.array(v)
    df = pd.DataFrame(arr)
    df.columns = ['t','Ve1','Ve2','Xe1','Xe2']

    #generate macroscopic values by taking a rolling mean of window time
    df['V01'] = df['Ve1'].rolling(window=window*(int(1/step))).mean()
    df['V02'] = df['Ve2'].rolling(window=window*(int(1/step))).mean()
    df['X01'] = df['Xe1'].rolling(window=window*(int(1/step))).mean()
    df['X02'] = df['Xe2'].rolling(window=window*(int(1/step))).mean()
    return df

## ODE Solver Generator

In [18]:
# Define the system of ODEs
def ode_system(y, t, eps):
    X = y[0]
    dX_dt = -X*np.cos((2*np.pi+eps*np.cos(10*t))*(t/eps))
    return [dX_dt]

def genDatawithOdeSolver(x0,window,eps,h,step):
    v=[]
    t = np.arange(0, h * 3600+101, step)
    initial_conditions = x0
    # Solve the ODEs
    solution = odeint(ode_system, initial_conditions, t, args=(eps,))
    df = pd.DataFrame(solution, columns=['Xe'])
    df['t'] = t

    #generate macroscopic values by taking a rolling mean of window time
    df['X0'] = df['Xe'].rolling(window=window*(int(1/step))).mean()
    df = df.reindex(columns=['t', 'X0', 'Xe'])
    return df

## Data Noising
This function adds Gaussian white noise to the macroscopic data $N(\mu,\sigma)$

In [19]:
def genNoiseData(df,gaussian_noise):
    # Add Gaussian noise to the macroscopic data columns
    df['X0']=df['X0'].add(gaussian_noise) 


## Data Points
At each macrostep take a single macroscopic data   and all the data coming from the next microscopic window 

In [20]:
def interval(df,macrostep,microstep):
    
    vals=[]
    # Define the macroscopic interval in seconds (macrostep*60 = 30 minutes)
    interval_seconds = macrostep * 60

    # Initialize the starting time
    current_time = 0

    while current_time < df['t'].max():

        # Find the index where 'X' is greater than or equal to the current_time
        index = df[df['t'] >= current_time].index[0]
        
        # Extract the 'macro' value at the current index
        current_t = df.at[index, 't']
        print("index ",current_t)
        current_X0 = float(df.at[index, 'X0'])
  
        

        # Extract the next microstep values of 'the micro' from the current index
        next_microsteps_Xe = df.loc[index:index + (microstep-1), 'Xe'].tolist()

        vals.append([current_t,current_X0,next_microsteps_Xe])


        # Update the current time to the next interval
        current_time += interval_seconds
        
    vals = vals[1:]
    return vals

# JSON

In [21]:
def tojson(vals,name):
    file_path = name
    # Write the data to a JSON file
    with open(file_path, "w") as json_file:
        json.dump(vals, json_file)

    print(f"Data has been saved to {file_path}")


# Initialization

In [22]:
# Micro structure ratio
eps=gl.eps

# Number of Hours for generation
h=gl.h

#time step in seconds
step=gl.step

# Macroscopic noise of mu mean and  sigma standard deviation 
mu,sigma=gl.mu,gl.sigma

#Macroscopic step  in minutes
macrostep=gl.macrostep

# Number of captured  microscopic steps in 10 seconds per macroscpic value
microstep=gl.microstep/gl.step

# Generation 
## Initial Params

In [23]:
#rolling average window in seconds =1min
w=gl.w
# initial state x1,x2,x3,x4
x_0=gl.x_0

## Full True Data

In [24]:
df1=genDatawithOdeSolver(x_0,w,eps,h,step)
df1.to_csv(("GenData/True/Full/"+model_name+".csv"), index=False)

## True Cut Data

In [25]:
#Cut True Data
#Taking measurements at intervals
vals1=interval(df1,macrostep,microstep)


index  0.0
index  300.0
index  600.0
index  900.0
index  1200.0
index  1500.0
index  1800.0
index  2100.0
index  2400.0
index  2700.0
index  3000.0
index  3300.0
index  3600.0


In [26]:
print(type(vals1[0][2]))
# Saving clean but cut data in JSON
tojson(vals1,("GenData/True/Cut/"+model_name+".json"))

<class 'list'>
Data has been saved to GenData/True/Cut/ToyModel6.json


## Adding noise to data

In [27]:
gaussian_noise = np.random.normal(mu, sigma, len(df1))
df1_noised=df1.copy()
genNoiseData(df1_noised,gaussian_noise)

## Noised Full Data

In [28]:
df1_noised.to_csv(("GenData/Noised/Full/"+model_name+".csv"), index=False)

## Noised Cut Data

In [29]:
#Taking measurements
vals1=interval(df1_noised,macrostep,microstep)
# Saving noised cut data in JSON
tojson(vals1,("GenData/Noised/Cut/"+model_name+".json"))


index  0.0
index  300.0
index  600.0
index  900.0
index  1200.0
index  1500.0
index  1800.0
index  2100.0
index  2400.0
index  2700.0
index  3000.0
index  3300.0
index  3600.0
Data has been saved to GenData/Noised/Cut/ToyModel6.json
