<a href="https://colab.research.google.com/github/himeshps/Tidal_Volume_Submission_230478/blob/main/DatasetGenerationMale.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd

# Define the features with their mean and standard deviation
features = {
    'weight': {'mean': 70.97, 'std': 12.51},
    'height': {'mean': 167.96, 'std': 7.26},
    'age': {'mean': 51.97, 'std': 16.11},
    'chest_circumference': {'mean': 97.01, 'std': 8.99},
    'BodyMassIndex': {'mean': 25.08, 'std': 3.68},
    'tidal_volume': {'mean': 516.04, 'std': 62.33},
}

# Extract means and standard deviations
means = [features[f]['mean'] for f in features]
stds = [features[f]['std'] for f in features]


In [2]:
# Assumed correlation coefficients matrix (symmetric matrix with 1's on the diagonal)
# Assumed correlation coefficients matrix (symmetric matrix with 1's on the diagonal)
correlations = np.array([
    [1.0, 0.795, 0.31,  0.85,0.862,0.90],
    [0.795,  1.0,  0.26,  0.56,0.396, 0.85],
    [0.31,  0.26,  1.0, 0.38, 0.25, -0.56],
    [0.85, 0.56, 0.38,  1.0, 0.56, 0.80],
    [0.862,0.396, 0.25,  0.56, 1.0, 0.69],
    [ 0.90, 0.85, -0.56, 0.80, 0.69, 1.0]
])


In [5]:
# Initialize covariance matrix
cov_matrix = np.zeros((6, 6))

# Fill covariance matrix
for i in range(len(features)):
    for j in range(len(features)):
        cov_matrix[i, j] = correlations[i, j] * stds[i] * stds[j]

print("Covariance Matrix:\n", cov_matrix)


Covariance Matrix:
 [[ 156.5001      72.203967    62.476191    95.595165    39.6837216
   701.77347  ]
 [  72.203967    52.7076      30.409236    36.549744    10.5798528
   384.63843  ]
 [  62.476191    30.409236   259.5321      55.034982    14.8212
  -562.316328 ]
 [  95.595165    36.549744    55.034982    80.8201      18.526592
   448.27736  ]
 [  39.6837216   10.5798528   14.8212      18.526592    13.5424
   158.268336 ]
 [ 701.77347    384.63843   -562.316328   448.27736    158.268336
  3885.0289   ]]


In [6]:
# Number of samples to generate
num_samples = 134

# Generate the synthetic dataset
synthetic_data = np.random.multivariate_normal(mean=means, cov=cov_matrix, size=num_samples)

# Convert to DataFrame
columns = list(features.keys())
df = pd.DataFrame(synthetic_data, columns=columns)

# Display the first few rows of the generated dataset
print(df.head())

# Verify the generated data
print("Means of the generated data:\n", df.mean())
print("Standard deviations of the generated data:\n", df.std())
print("Covariance matrix of the generated data:\n", df.cov())

# Save to CSV if needed
df.to_csv('synthetic_dataset_7_features.csv', index=False)


      weight      height        age  chest_circumference  BodyMassIndex  \
0  91.346296  171.408031  70.962901           105.679144      34.834027   
1  64.829972  160.503045  78.119063            99.424462      24.183873   
2  45.050329  153.322647  67.279286            78.610257      19.446265   
3  68.790600  168.168819  43.247930            96.142707      23.808737   
4  90.864228  182.063152  72.247020           109.592221      28.201905   

   tidal_volume  
0    589.182730  
1    443.636740  
2    400.453505  
3    532.410864  
4    592.913973  
Means of the generated data:
 weight                  73.797462
height                 168.870098
age                     55.230878
chest_circumference     99.329352
BodyMassIndex           25.830123
tidal_volume           525.004454
dtype: float64
Standard deviations of the generated data:
 weight                 15.842306
height                  9.848120
age                    17.621400
chest_circumference    11.502782
BodyMassIndex   

  synthetic_data = np.random.multivariate_normal(mean=means, cov=cov_matrix, size=num_samples)


In [7]:
from google.colab import files

# Download the file
files.download('synthetic_dataset_7_features.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>