In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,r2_score

In [7]:
df = pd.read_csv('cleaned_extracted_data.csv')
#Separate the input features and target values
X = df.drop(['SHLT', 'COGTOT', 'MSTOT'], axis=1)  # Input features
y = df[['SHLT', 'COGTOT', 'MSTOT']]  # Target variables
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize a dictionary to store the models and their metrics
models_metrics = {}

# Train separate models for each target variable
for target in y.columns:
    # Train the model
    model = LinearRegression()
    model.fit(X_train, y_train[target])
    
    # Predict on the testing set
    y_pred = model.predict(X_test)
    
    # Evaluate the model
    r2 = r2_score(y_test[target], y_pred)
    mse = mean_squared_error(y_test[target], y_pred)
    
    # Store the model and its metrics
    models_metrics[target] = {'R2 Score': r2, 'MSE': mse}

# Display the models' metrics
for target, metrics in models_metrics.items():
    print(f"Model for {target}: R2 Score = {metrics['R2 Score']}, MSE = {metrics['MSE']}")


Model for SHLT: R2 Score = 0.0738524814646534, MSE = 0.8228206267002112
Model for COGTOT: R2 Score = 0.012899357379497656, MSE = 17.00954825623232
Model for MSTOT: R2 Score = 0.031181298470694507, MSE = 3.114331817576285


In [8]:
df.describe()

Unnamed: 0,SHLT,BMI,MSTOT,COGTOT,INHPFN,INHPE,HHHRES,HCHILD,LIVSIB,HINPOV,HINPOVA,HAIRA,HATOTB,IEARN,HITOT,PENINC,HIGOV,PRPCNT,SLFEMP,RETMON
count,13212.0,13212.0,13212.0,13212.0,13212.0,13212.0,13212.0,13212.0,13212.0,13212.0,13212.0,13212.0,13212.0,13212.0,13212.0,13212.0,13212.0,13212.0,13212.0,13212.0
mean,2.472979,27.856804,13.50545,23.72525,0.045338,7.6e-05,2.306161,3.460188,2.520739,0.016727,0.016803,108892.7,728804.9,12613.818939,86394.33,0.354148,0.923176,0.67749,0.388435,1.0
std,0.934027,4.841408,1.782865,4.06663,0.256302,0.0087,0.760294,1.949209,2.249977,0.128252,0.128537,276907.9,1548633.0,31149.777109,147364.1,0.478272,0.266322,0.637129,0.487413,0.0
min,1.0,11.5,2.0,5.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,-3624527.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
25%,2.0,24.4,13.0,21.0,0.0,0.0,2.0,2.0,1.0,0.0,0.0,0.0,125237.5,0.0,36504.64,0.0,1.0,0.0,0.0,1.0
50%,2.0,27.3,14.0,24.0,0.0,0.0,2.0,3.0,2.0,0.0,0.0,11000.0,320000.0,3600.0,57312.0,0.0,1.0,1.0,0.0,1.0
75%,3.0,30.6,15.0,26.0,0.0,0.0,2.0,4.0,4.0,0.0,0.0,100000.0,759000.0,12925.0,94850.0,1.0,1.0,1.0,1.0,1.0
max,5.0,62.2,15.0,35.0,4.0,1.0,11.0,11.0,18.0,1.0,1.0,12080000.0,31660000.0,700000.0,8017657.0,1.0,1.0,21.0,1.0,1.0
