<a href="https://colab.research.google.com/github/himeshps/Tidal_Volume_Submission_230478/blob/main/DatasetGenerationFemale.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd

# Defined the features with their means and standard deviations
features = {
    'weight': {'mean': 60.81, 'std': 11.10},
    'height': {'mean': 155.88, 'std': 6.15},
    'age': {'mean': 55.51, 'std': 16.93},
    'chest_circumference': {'mean': 91.73, 'std': 11.21},
    'BodyMassIndex': {'mean': 25.00, 'std': 4.15},
    'tidal_volume': {'mean': 440.91, 'std': 47.98},
}

# Extract means and standard deviations
means = [features[f]['mean'] for f in features]
stds = [features[f]['std'] for f in features]

In [2]:
# Assumed correlation coefficients matrix (symmetric matrix with 1's on the diagonal).
#Assumptions are also individually based on research and analysis.
correlations = np.array([
    [1.0, 0.821, 0.31,  0.85,0.866,0.90],
    [0.821,  1.0,  0.26,  0.56,0.418, 0.85],
    [0.31,  0.26,  1.0, 0.36, 0.25, -0.56],
    [0.85, 0.56, 0.36,  1.0, 0.56, 0.80],
    [0.866,0.418, 0.25,  0.56, 1.0, 0.67],
    [ 0.90, 0.85, -0.56, 0.80, 0.67, 1.0]
])

In [4]:
# Initializing covariance matrix
cov_matrix = np.zeros((6, 6))

# Filling covariance matrix
for i in range(len(features)):
    for j in range(len(features)):
        cov_matrix[i, j] = correlations[i, j] * stds[i] * stds[j]

print("Covariance Matrix:\n", cov_matrix)

Covariance Matrix:
 [[ 123.21       56.045565   58.25613   105.76635    39.89229   479.3202  ]
 [  56.045565   37.8225     27.07107    38.60724    10.668405  250.81545 ]
 [  58.25613    27.07107   286.6249     68.322708   17.564875 -454.888784]
 [ 105.76635    38.60724    68.322708  125.6641     26.05204   430.28464 ]
 [  39.89229    10.668405   17.564875   26.05204    17.2225    133.40839 ]
 [ 479.3202    250.81545  -454.888784  430.28464   133.40839  2302.0804  ]]


In [6]:
# Number of samples to generate as from the source
num_samples = 154

# Generation of the synthetic dataset
synthetic_data = np.random.multivariate_normal(mean=means, cov=cov_matrix, size=num_samples)

# Convert to DataFrame
columns = list(features.keys())
df = pd.DataFrame(synthetic_data, columns=columns)

# To Display the first few rows of the generated dataset
print(df.head())

# To Verify the generated data
print("Means of the generated data:\n", df.mean())
print("Standard deviations of the generated data:\n", df.std())
print("Covariance matrix of the generated data:\n", df.cov())

# Saving as CSV
df.to_csv('synthetic_dataset_6_features_female.csv', index=False)

      weight      height        age  chest_circumference  BodyMassIndex  \
0  45.757826  140.552437  62.235624            82.073387      24.240196   
1  58.014769  161.160835  66.657825            75.641414      24.013658   
2  60.059109  158.299800  95.775068            95.214410      21.717779   
3  50.247011  154.592054  25.776156            77.497380      18.608985   
4  69.506174  165.243121  35.442947           101.268915      23.699756   

   tidal_volume  
0    396.848075  
1    367.990908  
2    390.850029  
3    495.418275  
4    476.450908  
Means of the generated data:
 weight                  59.452282
height                 155.681271
age                     52.351450
chest_circumference     90.385648
BodyMassIndex           24.675103
tidal_volume           439.212780
dtype: float64
Standard deviations of the generated data:
 weight                 13.332091
height                  8.665453
age                    18.887268
chest_circumference    13.203363
BodyMassIndex   

  synthetic_data = np.random.multivariate_normal(mean=means, cov=cov_matrix, size=num_samples)


In [7]:
from google.colab import files

# Download the file
files.download('synthetic_dataset_6_features_female.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>