<a href="https://colab.research.google.com/github/bish-ai/Bishal.py/blob/main/medical_cost_personal_regressor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Task
```python
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Correct Feature Preprocessing: Update ColumnTransformer
# Define categorical and numerical features based on the dataset
categorical_features = ["sex", "smoker", "region"]
numerical_features = ["age", "bmi", "children"]

# Create the ColumnTransformer to apply StandardScaler to numerical features
# and OneHotEncoder to categorical features including 'smoker'
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ],
    remainder='passthrough' # Pass through any other columns not explicitly transformed (none in this case)
)

# Apply the updated preprocessing to training and test data
x_train_processed = preprocessor.fit_transform(x_train)
x_test_processed = preprocessor.transform(x_test)

# Re-apply PCA to the newly transformed feature sets
pca_feauture_new = PCA(n_components=0.95) # Retain 95% of variance
pca_feauture_x_train = pca_feauture_new.fit_transform(x_train_processed)
pca_feauture_x_test = pca_feauture_new.transform(x_test_processed)

# Re-train Linear Regression Model using the correctly preprocessed and PCA-transformed features
lr_new = LinearRegression()
lr_new.fit(pca_feauture_x_train, pt_target_ss_target_fit_transform_y_train)

# Make predictions with the re-trained model
lr_pred_new = lr_new.predict(pca_feauture_x_test)

# Evaluate Corrected Model: Calculate R2 score
r2_new = r2_score(pt_target_ss_target_transform_y_test, lr_pred_new)
print(f"R2 Score of the corrected Linear Regression model: {r2_new}")
```

In [54]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler

categorical_features = ['sex', 'smoker', 'region']
numerical_features = ['age', 'bmi', 'children']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ],
    remainder='passthrough'
)

x_train_processed = preprocessor.fit_transform(x_train)
x_test_processed = preprocessor.transform(x_test)

print("Shape of x_train_processed:", x_train_processed.shape)
print("Shape of x_test_processed:", x_test_processed.shape)

Shape of x_train_processed: (936, 11)
Shape of x_test_processed: (402, 11)


In [55]:
from sklearn.decomposition import PCA

pca_feauture_new = PCA(n_components=0.95) # Retain 95% of variance
pca_feauture_x_train = pca_feauture_new.fit_transform(x_train_processed)
pca_feauture_x_test = pca_feauture_new.transform(x_test_processed)

print(f"Shape of PCA-transformed x_train: {pca_feauture_x_train.shape}")
print(f"Shape of PCA-transformed x_test: {pca_feauture_x_test.shape}")

Shape of PCA-transformed x_train: (936, 7)
Shape of PCA-transformed x_test: (402, 7)


In [56]:
from sklearn.linear_model import LinearRegression
lr_new = LinearRegression()
lr_new.fit(pca_feauture_x_train, pt_target_ss_target_fit_transform_y_train)
print("Linear Regression model re-trained successfully.")

Linear Regression model re-trained successfully.


In [57]:
lr_pred_new = lr_new.predict(pca_feauture_x_test)
from sklearn.metrics import r2_score
r2_new = r2_score(pt_target_ss_target_transform_y_test, lr_pred_new)
print(f"R2 Score of the corrected Linear Regression model: {r2_new}")

R2 Score of the corrected Linear Regression model: 0.8070794695934931


In [59]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
mae = mean_absolute_error(pt_target_ss_target_transform_y_test, lr_pred_new)
mse = mean_squared_error(pt_target_ss_target_transform_y_test, lr_pred_new)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")

Mean Absolute Error (MAE): 0.30116639295960085
Mean Squared Error (MSE): 0.19464048903565537
