In [3]:
import numpy as np
import pandas as pd
from scipy import stats

# Set random seed for reproducibility
np.random.seed(42)

# Create real-world data
T = 200  # Number of time periods
K = 6  # Number of variables (C, I, U, Y, P, R)

def generate_data(T, K, noise_level=0.1, non_gaussian=False):
    data = np.zeros((T, K))
    
    if non_gaussian:
        # C: Consumption (log-normal distribution)
        data[:, 0] = np.exp(np.cumsum(np.random.normal(0.02, 0.05, T))) + 100
        
        # I: Investment (mixture of two normal distributions)
        data[:, 1] = np.where(np.random.rand(T) > 0.7, 
                              np.random.normal(70, 10, T), 
                              np.random.normal(40, 5, T))
        
        # U: Unemployment (beta distribution)
        data[:, 2] = stats.beta.rvs(2, 5, size=T) * 15
        
        # Y: GDP (student's t-distribution)
        data[:, 3] = stats.t.rvs(df=3, loc=300, scale=20, size=T)
        
        # P: Price Index (gamma distribution)
        data[:, 4] = stats.gamma.rvs(a=2, loc=100, scale=2, size=T)
        
        # R: Interest Rate (mixture of normal and exponential)
        data[:, 5] = np.where(np.random.rand(T) > 0.8, 
                              stats.expon.rvs(loc=0, scale=2, size=T),
                              np.random.normal(2, 0.5, T))
    else:
        # Original Gaussian version
        data[:, 0] = np.cumsum(np.random.normal(0.5, 0.1, T)) + 100
        data[:, 1] = np.cumsum(np.random.normal(0.3, 0.2, T)) + 50
        data[:, 2] = 5 + np.random.normal(0, 0.5, T)
        data[:, 3] = data[:, 0] + data[:, 1] + np.random.normal(0, 5, T) + 200
        data[:, 4] = np.cumsum(np.random.normal(0.02, 0.005, T)) + 100
        data[:, 5] = 2 + 0.5 * np.sin(np.linspace(0, 4*np.pi, T)) + np.random.normal(0, 0.25, T)
    
    # Add some noise
    data += np.random.normal(0, noise_level, (T, K))
    
    # Ensure non-negative values for certain variables
    data[:, 2] = np.clip(data[:, 2], 0, 100)  # Unemployment can't be negative or over 100%
    data[:, 4] = np.clip(data[:, 4], 0, None)  # Price index can't be negative
    data[:, 5] = np.clip(data[:, 5], 0, None)  # Interest rate can't be negative
    
    return data

### Generate Gaussian Datasets

In [4]:
# Generate real-world data (Gaussian)
RW_data = generate_data(T, K, non_gaussian=False)

# Generate 10 Monte Carlo simulations of ABM data (Gaussian)
M = 10  # Number of Monte Carlo simulations
AB_data = np.array([generate_data(T, K, noise_level=0.15, non_gaussian=False) for _ in range(M)])

# Convert to pandas DataFrames
columns = ['C', 'I', 'U', 'Y', 'P', 'R']
RW_df = pd.DataFrame(RW_data, columns=columns)
AB_dfs = [pd.DataFrame(AB_data[i], columns=columns) for i in range(M)]

# Save to CSV files
RW_df.to_csv('rw_data_gaussian.csv', index=False)
for i, df in enumerate(AB_dfs):
    df.to_csv(f'abm_data_{i+1}_gaussian.csv', index=False)

print("Gaussian datasets have been created and saved to CSV files.")

Gaussian datasets have been created and saved to CSV files.


### Generate Non-Gaussian Datasets

In [5]:
# Generate real-world data (non-Gaussian)
RW_data = generate_data(T, K, non_gaussian=True)

# Generate 10 Monte Carlo simulations of ABM data (non-Gaussian)
M = 10  # Number of Monte Carlo simulations
AB_data = np.array([generate_data(T, K, noise_level=0.15, non_gaussian=True) for _ in range(M)])

# Convert to pandas DataFrames
columns = ['C', 'I', 'U', 'Y', 'P', 'R']
RW_df = pd.DataFrame(RW_data, columns=columns)
AB_dfs = [pd.DataFrame(AB_data[i], columns=columns) for i in range(M)]

# Save to CSV files
RW_df.to_csv('rw_data_non_gaussian.csv', index=False)
for i, df in enumerate(AB_dfs):
    df.to_csv(f'abm_data_{i+1}_non_gaussian.csv', index=False)

print("Non-Gaussian datasets have been created and saved to CSV files.")

Non-Gaussian datasets have been created and saved to CSV files.
