In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
num_samples = 1000
np.random.seed(0)
ages = np.random.randint(0, 101, num_samples)
genders = np.random.randint(0, 2, num_samples)
heights = np.random.uniform(100, 201, num_samples)
cough = np.random.randint(0, 2, num_samples)
shortness_of_breath = np.random.randint(0, 2, num_samples)
chest_pain = np.random.randint(0, 2, num_samples)
base_tidal_volume = (heights * 0.03 + ages * 0.02 + genders * 0.2) + np.random.normal(0, 5, num_samples)
tidal_volume = base_tidal_volume - (cough * 0.15 + shortness_of_breath * 0.2 + chest_pain * 0.1)

data = pd.DataFrame({
    'Age': ages,
    'Gender': genders,
    'Height': heights,
    'Cough': cough,
    'Shortness_of_Breath': shortness_of_breath,
    'Chest_Pain': chest_pain,
    'Tidal_Volume': tidal_volume
})
data.to_csv('synthetic_lung_function_data.csv', index=False)
print(data.head())
data = pd.read_csv('synthetic_lung_function_data.csv')
X = data[['Age', 'Gender', 'Height', 'Cough', 'Shortness_of_Breath', 'Chest_Pain']]
y = data['Tidal_Volume']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')


   Age  Gender      Height  Cough  Shortness_of_Breath  Chest_Pain  \
0   44       0  190.271048      0                    0           0   
1   47       1  158.081233      1                    0           0   
2   64       0  125.933549      1                    0           1   
3   67       1  123.915794      1                    1           1   
4   67       1  145.286953      1                    0           0   

   Tidal_Volume  
0      8.816807  
1      9.115751  
2      3.235542  
3      5.418336  
4      7.828486  
Mean Squared Error: 25.233649344578293
R^2 Score: 0.0036298710464576223
