## Simulating Datasets Using Probabilistic Methods

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns 
import matplotlib.pyplot as plt
import os 

In [3]:
# Gaussian (Normal) Distribution for Health Index
# Assume average Index of 70, std of 15
health_index = np.random.normal(loc=70, scale=15, size=1000)  
health_index.shape

(1000,)

In [4]:
# Simulating Gas Concentrations Using Empirical Rules
# For dissolved gas analysis, certain patterns of gas levels and ratios are indicative of different faults. 
# Assuming ranges for gas concentrations in ppm for normal operation

# Assume H2, CH4 normal concentration and C2H2 (higher in arcing faults)
hydrogen = np.random.normal(loc=50, scale=20, size=1000)   
methane = np.random.normal(loc=30, scale=10, size=1000)
acetylene = np.random.normal(loc=5, scale=3, size=1000) 


In [5]:
# Correlations Between Features
# Use covariance matrices to simulate correlated variables like gas ratios.
# Simulate correlations between gases during a fault (e.g., high acetylene (C₂H₂) might correlate with higher ethylene (C₂H₄) during arcing)
# Mean values for H2, CH4, and C2H2
mean = [50, 30, 5]  
# Correlations between gases
cov_matrix = [[100, 40, 10], [40, 30, 5], [10, 5, 8]]  
gas_data = np.random.multivariate_normal(mean, cov_matrix, 1000)
gas_data

array([[51.63142892, 35.83299631,  7.13662435],
       [38.84453953, 23.833003  ,  3.52809049],
       [44.21321325, 27.6802383 ,  0.93803572],
       ...,
       [51.83197543, 32.07425467,  8.31482221],
       [39.95699775, 32.82941356,  7.97854191],
       [45.85993596, 29.35109717,  5.38158306]])

In [9]:
# Simulating Operational Data
# Temperature and Load Cycles
# Simulate temperature and load data using sinusoidal functions to represent daily or seasonal fluctuations.
import numpy as np
time = np.arange(0, 1000, 1)  
 # Seasonal temperature variation
temperature = 30 + 10 * np.sin(2 * np.pi * time / 365) 
# Daily load (KW) fluctuation by time
load = 50 + 20 * np.sin(2 * np.pi * time / 24) 

(1000,)

In [11]:
load.shape
load = pd.DataFrame(load)


             0
0    50.000000
1    55.176381
2    60.000000
3    64.142136
4    67.320508
..         ...
995  55.176381
996  50.000000
997  44.823619
998  40.000000
999  35.857864

[1000 rows x 1 columns]
