In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,r2_score

In [11]:
df = pd.read_csv('cleaned_extracted_data.csv')
#Separate the input features and target values
X = df.drop(['SHLT', 'COGTOT', 'MSTOT'], axis=1)  # Input features
y = df[['SHLT', 'COGTOT', 'MSTOT']]  # Target variables
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize a dictionary to store the models and their metrics
models_metrics = {}

# Train separate models for each target variable
for target in y.columns:
    # Train the model
    model = LinearRegression()
    model.fit(X_train, y_train[target])
    
    # Predict on the testing set
    y_pred = model.predict(X_test)
    
    # Evaluate the model
    r2 = r2_score(y_test[target], y_pred)
    mse = mean_squared_error(y_test[target], y_pred)
    
    # Store the model and its metrics
    models_metrics[target] = {'R2 Score': r2, 'MSE': mse}

# Display the models' metrics
for target, metrics in models_metrics.items():
    print(f"Model for {target}: R2 Score = {metrics['R2 Score']}, MSE = {metrics['MSE']}")


Model for SHLT: R2 Score = 0.0914885615851928, MSE = 0.8728288887488391
Model for COGTOT: R2 Score = 0.0869117989042415, MSE = 15.792951841391258
Model for MSTOT: R2 Score = 0.09479460897473435, MSE = 3.240020599413017


In [13]:
df.describe()

Unnamed: 0,SHLT,BMI,MSTOT,COGTOT,INHPFN,INHPE,HHHRES,HCHILD,LIVSIB,HINPOV,HINPOVA,HAIRA,HATOTB,IEARN,HITOT,PENINC,HIGOV,PRPCNT,SLFEMP,RETMON
count,38487.0,38487.0,38487.0,38487.0,38487.0,38487.0,38487.0,38487.0,38487.0,38487.0,38487.0,38487.0,38487.0,38487.0,38487.0,38487.0,38487.0,38487.0,38487.0,38487.0
mean,2.475251,28.259106,13.365526,23.946761,0.041287,2.6e-05,2.678879,3.26661,2.944813,0.035518,0.035544,78742.64,579882.2,31068.2,102512.5,0.167953,0.561618,0.786214,0.280484,0.343285
std,0.970384,5.320587,1.874137,4.143787,0.255348,0.005097,1.140705,1.933677,2.451244,0.185089,0.185154,283976.1,1330807.0,52357.43,159141.1,0.373829,0.496195,0.620732,0.449242,0.474812
min,1.0,9.7,2.0,3.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,-3624527.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,24.6,12.0,21.0,0.0,0.0,2.0,2.0,1.0,0.0,0.0,0.0,76000.0,0.0,41812.0,0.0,0.0,0.0,0.0,0.0
50%,2.0,27.4,14.0,24.0,0.0,0.0,2.0,3.0,2.0,0.0,0.0,0.0,228400.0,15000.0,70880.0,0.0,1.0,1.0,0.0,0.0
75%,3.0,31.1,15.0,27.0,0.0,0.0,3.0,4.0,4.0,0.0,0.0,60000.0,588500.0,42000.0,119400.0,0.0,1.0,1.0,1.0,1.0
max,5.0,62.3,15.0,35.0,5.0,1.0,14.0,11.0,18.0,1.0,1.0,35027000.0,42226310.0,2000000.0,10036000.0,1.0,1.0,23.0,1.0,1.0
