<a href="https://colab.research.google.com/github/divsal009/div/blob/master/HOME1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
import xgboost as xgb

# Load your dataset
df = pd.read_csv('/content/DD.csv')

# Combine 'Assistive Technology' and 'Customized Housing Design' into a single column for multiclass prediction
df['Assistive_Cust_Design'] = df['Assistive Technology'] + ' + ' + df['Customized Housing Design']

# Drop unnecessary columns
X = df[['Specific Disease', 'Body Functions', 'Activities and Participation', 'Home Functions']]
y = df['Assistive_Cust_Design']

# Apply one-hot encoding to categorical columns in X
X = pd.get_dummies(X, columns=['Specific Disease', 'Body Functions', 'Activities and Participation', 'Home Functions'])

# Convert all columns in X to float to avoid issues with SMOTE
X = X.astype(float)

# Encode the target variable (y) using LabelEncoder
from sklearn.preprocessing import LabelEncoder
target_encoder = LabelEncoder()
y = target_encoder.fit_transform(y)

# Handle imbalance using SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Initialize XGBoost classifier
xgb_clf = xgb.XGBClassifier()

# Define hyperparameter search space
param_dist = {
    'n_estimators': [100, 200, 300],
    'max_depth': [5, 10, 15],
    'learning_rate': [0.01, 0.1, 0.3],
    'subsample': [0.7, 0.8, 1.0],
    'colsample_bytree': [0.7, 0.8, 1.0],
    'gamma': [0, 0.1, 0.2],
    'reg_lambda': [1, 1.5, 2],
    'reg_alpha': [0, 0.1, 0.2]
}

# Use RandomizedSearchCV to find the best hyperparameters
random_search = RandomizedSearchCV(estimator=xgb_clf, param_distributions=param_dist, n_iter=50,
                                   cv=3, verbose=2, n_jobs=-1, random_state=42)

# Fit the model to the training data
random_search.fit(X_train, y_train)

# Best parameters from the search
print(f"Best parameters: {random_search.best_params_}")

# Use the best estimator for predictions
best_xgb = random_search.best_estimator_

# Evaluate the model using cross-validation
from sklearn.model_selection import cross_val_score
cv_scores = cross_val_score(best_xgb, X_train, y_train, cv=5)
print(f"Cross-validation scores: {cv_scores}")
print(f"Mean cross-validation score: {cv_scores.mean() * 100:.2f}%")

# Make predictions on the test set using the best model
y_pred = best_xgb.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Test accuracy: {accuracy * 100:.2f}%")

# Function to safely encode new input data
def predict_assistive_and_design(specific_disease, body_functions, activities, home_functions):
    input_data = pd.DataFrame({
        'Specific Disease': [specific_disease],
        'Body Functions': [body_functions],
        'Activities and Participation': [activities],
        'Home Functions': [home_functions]
    })

    # Apply one-hot encoding to the input data
    input_data = pd.get_dummies(input_data)

    # Align the columns with the training data to avoid dimension mismatches
    input_data = input_data.reindex(columns=X.columns, fill_value=0)

    # Predict
    prediction = best_xgb.predict(input_data)

    # Decode the prediction back to original label
    return target_encoder.inverse_transform(prediction)[0]

# Example prediction
print(predict_assistive_and_design('Parkinsons Disease', 'Hearing functions (b230)', 'Limited mobility (d455)', 'Old building (e150)'))


Fitting 3 folds for each of 50 candidates, totalling 150 fits
Best parameters: {'subsample': 0.8, 'reg_lambda': 2, 'reg_alpha': 0, 'n_estimators': 300, 'max_depth': 15, 'learning_rate': 0.1, 'gamma': 0, 'colsample_bytree': 0.7}
Cross-validation scores: [0.19  0.145 0.1   0.185 0.14 ]
Mean cross-validation score: 15.20%
Test accuracy: 12.80%
Voice-activated devices (e120) + Enhanced lighting (e155)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler

# Load your dataset
df = pd.read_csv('/content/DD.csv')

# Drop unnecessary columns and separate the tasks
X = df[['Specific Disease', 'Body Functions', 'Activities and Participation', 'Home Functions']]
y_assistive = df['Assistive Technology']  # Separate the target for Assistive Technology
y_housing = df['Customized Housing Design']  # Separate the target for Customized Housing Design

# Apply one-hot encoding to categorical columns in X
X = pd.get_dummies(X, columns=['Specific Disease', 'Body Functions', 'Activities and Participation', 'Home Functions'])

# Convert all columns in X to float
X = X.astype(float)

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Handle imbalance using SMOTE for Assistive Technology
smote = SMOTE(random_state=42)
X_resampled_assistive, y_resampled_assistive = smote.fit_resample(X_scaled, y_assistive)

# Handle imbalance using SMOTE for Customized Housing Design
X_resampled_housing, y_resampled_housing = smote.fit_resample(X_scaled, y_housing)

# Split the dataset into training and testing sets for both tasks
X_train_assistive, X_test_assistive, y_train_assistive, y_test_assistive = train_test_split(X_resampled_assistive, y_resampled_assistive, test_size=0.2, random_state=42)
X_train_housing, X_test_housing, y_train_housing, y_test_housing = train_test_split(X_resampled_housing, y_resampled_housing, test_size=0.2, random_state=42)

# Initialize Random Forest classifier
rf_clf_assistive = RandomForestClassifier(random_state=42)
rf_clf_housing = RandomForestClassifier(random_state=42)

# Define hyperparameter search space for Random Forest (Assistive Technology)
param_dist_assistive = {
    'n_estimators': [100, 200, 500],
    'max_depth': [10, 20, 30, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# Define hyperparameter search space for Random Forest (Customized Housing Design)
param_dist_housing = {
    'n_estimators': [100, 200, 500],
    'max_depth': [10, 20, 30, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# Use RandomizedSearchCV to find the best hyperparameters for Assistive Technology
random_search_assistive = RandomizedSearchCV(estimator=rf_clf_assistive, param_distributions=param_dist_assistive, n_iter=50,
                                             cv=3, verbose=2, n_jobs=-1, random_state=42)
random_search_assistive.fit(X_train_assistive, y_train_assistive)

# Use RandomizedSearchCV to find the best hyperparameters for Customized Housing Design
random_search_housing = RandomizedSearchCV(estimator=rf_clf_housing, param_distributions=param_dist_housing, n_iter=50,
                                           cv=3, verbose=2, n_jobs=-1, random_state=42)
random_search_housing.fit(X_train_housing, y_train_housing)

# Best parameters from the search (Assistive Technology)
print(f"Best parameters for Assistive Technology: {random_search_assistive.best_params_}")

# Best parameters from the search (Customized Housing Design)
print(f"Best parameters for Customized Housing Design: {random_search_housing.best_params_}")

# Use the best estimator for Assistive Technology predictions
best_rf_assistive = random_search_assistive.best_estimator_

# Use the best estimator for Customized Housing Design predictions
best_rf_housing = random_search_housing.best_estimator_

# Make predictions on the test set using the best model for Assistive Technology
y_pred_assistive = best_rf_assistive.predict(X_test_assistive)

# Make predictions on the test set using the best model for Customized Housing Design
y_pred_housing = best_rf_housing.predict(X_test_housing)

# Evaluate the model for Assistive Technology
accuracy_assistive = accuracy_score(y_test_assistive, y_pred_assistive)
print(f"Test accuracy for Assistive Technology: {accuracy_assistive * 100:.2f}%")

# Evaluate the model for Customized Housing Design
accuracy_housing = accuracy_score(y_test_housing, y_pred_housing)
print(f"Test accuracy for Customized Housing Design: {accuracy_housing * 100:.2f}%")

# Function to safely predict Assistive Technology and Housing Design separately
def predict_assistive_and_design(specific_disease, body_functions, activities, home_functions):
    input_data = pd.DataFrame({
        'Specific Disease': [specific_disease],
        'Body Functions': [body_functions],
        'Activities and Participation': [activities],
        'Home Functions': [home_functions]
    })

    # Apply one-hot encoding to the input data
    input_data = pd.get_dummies(input_data)

    # Align the columns with the training data to avoid dimension mismatches
    input_data = input_data.reindex(columns=X.columns, fill_value=0)

    # Scale the input data
    input_data_scaled = scaler.transform(input_data)

    # Predict Assistive Technology
    prediction_assistive = best_rf_assistive.predict(input_data_scaled)

    # Predict Customized Housing Design
    prediction_housing = best_rf_housing.predict(input_data_scaled)

    return prediction_assistive[0], prediction_housing[0]

# Example prediction
assistive_pred, housing_pred = predict_assistive_and_design('Parkinsons Disease', 'Hearing functions (b230)', 'Limited mobility (d455)', 'Old building (e150)')
print(f"Predicted Assistive Technology: {assistive_pred}")
print(f"Predicted Customized Housing Design: {housing_pred}")


Fitting 3 folds for each of 50 candidates, totalling 150 fits
Fitting 3 folds for each of 50 candidates, totalling 150 fits
Best parameters for Assistive Technology: {'n_estimators': 500, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_depth': 30, 'bootstrap': False}
Best parameters for Customized Housing Design: {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_depth': 10, 'bootstrap': True}
Test accuracy for Assistive Technology: 23.53%
Test accuracy for Customized Housing Design: 23.04%
Predicted Assistive Technology: Voice-activated devices (e120)
Predicted Customized Housing Design: Smart Home Integration (e155)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load your dataset
df = pd.read_csv('/content/DD.csv')

# Drop unnecessary columns and separate the tasks
X = df[['Specific Disease', 'Body Functions', 'Activities and Participation', 'Home Functions']]
y_assistive = df['Assistive Technology']  # Separate the target for Assistive Technology
y_housing = df['Customized Housing Design']  # Separate the target for Customized Housing Design

# Apply one-hot encoding to categorical columns in X
X = pd.get_dummies(X, columns=['Specific Disease', 'Body Functions', 'Activities and Participation', 'Home Functions'])

# Convert all columns in X to float
X = X.astype(float)

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# **Encode the target variables (Assistive Technology and Customized Housing Design)**
le_assistive = LabelEncoder()
y_assistive_encoded = le_assistive.fit_transform(y_assistive)

le_housing = LabelEncoder()
y_housing_encoded = le_housing.fit_transform(y_housing)

# Handle imbalance using SMOTE for Assistive Technology
smote = SMOTE(random_state=42)
X_resampled_assistive, y_resampled_assistive = smote.fit_resample(X_scaled, y_assistive_encoded)

# Handle imbalance using SMOTE for Customized Housing Design
X_resampled_housing, y_resampled_housing = smote.fit_resample(X_scaled, y_housing_encoded)

# Split the dataset into training and testing sets for both tasks
X_train_assistive, X_test_assistive, y_train_assistive, y_test_assistive = train_test_split(X_resampled_assistive, y_resampled_assistive, test_size=0.2, random_state=42)
X_train_housing, X_test_housing, y_train_housing, y_test_housing = train_test_split(X_resampled_housing, y_resampled_housing, test_size=0.2, random_state=42)

# Initialize XGBoost classifier for Assistive Technology
xgb_clf_assistive = XGBClassifier(random_state=42)

# Initialize XGBoost classifier for Customized Housing Design
xgb_clf_housing = XGBClassifier(random_state=42)

# Define hyperparameter search space for XGBoost (Assistive Technology)
param_dist_assistive = {
    'n_estimators': [100, 200, 500],
    'max_depth': [5, 10, 20],
    'learning_rate': [0.01, 0.1, 0.3],
    'subsample': [0.7, 0.8, 1.0],
    'colsample_bytree': [0.7, 0.8, 1.0],
    'gamma': [0, 0.1, 0.2]
}

# Define hyperparameter search space for XGBoost (Customized Housing Design)
param_dist_housing = {
    'n_estimators': [100, 200, 500],
    'max_depth': [5, 10, 20],
    'learning_rate': [0.01, 0.1, 0.3],
    'subsample': [0.7, 0.8, 1.0],
    'colsample_bytree': [0.7, 0.8, 1.0],
    'gamma': [0, 0.1, 0.2]
}

# Use RandomizedSearchCV to find the best hyperparameters for Assistive Technology
random_search_assistive = RandomizedSearchCV(estimator=xgb_clf_assistive, param_distributions=param_dist_assistive, n_iter=50,
                                             cv=3, verbose=2, n_jobs=-1, random_state=42)
random_search_assistive.fit(X_train_assistive, y_train_assistive)

# Use RandomizedSearchCV to find the best hyperparameters for Customized Housing Design
random_search_housing = RandomizedSearchCV(estimator=xgb_clf_housing, param_distributions=param_dist_housing, n_iter=50,
                                           cv=3, verbose=2, n_jobs=-1, random_state=42)
random_search_housing.fit(X_train_housing, y_train_housing)

# Best parameters from the search (Assistive Technology)
print(f"Best parameters for Assistive Technology: {random_search_assistive.best_params_}")

# Best parameters from the search (Customized Housing Design)
print(f"Best parameters for Customized Housing Design: {random_search_housing.best_params_}")

# Use the best estimator for Assistive Technology predictions
best_xgb_assistive = random_search_assistive.best_estimator_

# Use the best estimator for Customized Housing Design predictions
best_xgb_housing = random_search_housing.best_estimator_

# Make predictions on the test set using the best model for Assistive Technology
y_pred_assistive = best_xgb_assistive.predict(X_test_assistive)

# Make predictions on the test set using the best model for Customized Housing Design
y_pred_housing = best_xgb_housing.predict(X_test_housing)

# Evaluate the model for Assistive Technology
accuracy_assistive = accuracy_score(y_test_assistive, y_pred_assistive)
print(f"Test accuracy for Assistive Technology: {accuracy_assistive * 100:.2f}%")

# Evaluate the model for Customized Housing Design
accuracy_housing = accuracy_score(y_test_housing, y_pred_housing)
print(f"Test accuracy for Customized Housing Design: {accuracy_housing * 100:.2f}%")

# Function to predict Assistive Technology and Housing Design separately
def predict_assistive_and_design(specific_disease, body_functions, activities, home_functions):
    input_data = pd.DataFrame({
        'Specific Disease': [specific_disease],
        'Body Functions': [body_functions],
        'Activities and Participation': [activities],
        'Home Functions': [home_functions]
    })

    # Apply one-hot encoding to the input data
    input_data = pd.get_dummies(input_data)

    # Align the columns with the training data to avoid dimension mismatches
    input_data = input_data.reindex(columns=X.columns, fill_value=0)

    # Scale the input data
    input_data_scaled = scaler.transform(input_data)

    # Predict Assistive Technology
    prediction_assistive = best_xgb_assistive.predict(input_data_scaled)

    # Predict Customized Housing Design
    prediction_housing = best_xgb_housing.predict(input_data_scaled)

    return le_assistive.inverse_transform([prediction_assistive[0]]), le_housing.inverse_transform([prediction_housing[0]])

# Example prediction
assistive_pred, housing_pred = predict_assistive_and_design('Parkinsons Disease', 'Hearing functions (b230)', 'Limited mobility (d455)', 'Old building (e150)')
print(f"Predicted Assistive Technology: {assistive_pred[0]}")
print(f"Predicted Customized Housing Design: {housing_pred[0]}")


Fitting 3 folds for each of 50 candidates, totalling 150 fits
Fitting 3 folds for each of 50 candidates, totalling 150 fits
Best parameters for Assistive Technology: {'subsample': 0.7, 'n_estimators': 200, 'max_depth': 10, 'learning_rate': 0.1, 'gamma': 0, 'colsample_bytree': 0.7}
Best parameters for Customized Housing Design: {'subsample': 1.0, 'n_estimators': 100, 'max_depth': 5, 'learning_rate': 0.1, 'gamma': 0, 'colsample_bytree': 1.0}
Test accuracy for Assistive Technology: 26.24%
Test accuracy for Customized Housing Design: 20.59%
Predicted Assistive Technology: Voice-activated devices (e120)
Predicted Customized Housing Design: Smart Home Integration (e155)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import StackingClassifier

# Load your dataset
df = pd.read_csv('/content/DD.csv')

# Drop unnecessary columns and separate the tasks
X = df[['Specific Disease', 'Body Functions', 'Activities and Participation', 'Home Functions']]
y_assistive = df['Assistive Technology']  # Separate the target for Assistive Technology
y_housing = df['Customized Housing Design']  # Separate the target for Customized Housing Design

# Apply one-hot encoding to categorical columns in X
X = pd.get_dummies(X, columns=['Specific Disease', 'Body Functions', 'Activities and Participation', 'Home Functions'])

# List the one-hot encoded columns for 'Specific Disease' and 'Body Functions'
disease_columns = [col for col in X.columns if 'Specific Disease_' in col]
body_func_columns = [col for col in X.columns if 'Body Functions_' in col]
activity_columns = [col for col in X.columns if 'Activities and Participation_' in col]

# Create interaction features using one-hot encoded columns
for disease_col in disease_columns:
    for body_col in body_func_columns:
        X[f'{disease_col}_x_{body_col}'] = X[disease_col] * X[body_col]

for body_col in body_func_columns:
    for activity_col in activity_columns:
        X[f'{body_col}_x_{activity_col}'] = X[body_col] * X[activity_col]

# Convert all columns in X to float
X = X.astype(float)

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Encode the target variables (Assistive Technology and Customized Housing Design)
le_assistive = LabelEncoder()
y_assistive_encoded = le_assistive.fit_transform(y_assistive)

le_housing = LabelEncoder()
y_housing_encoded = le_housing.fit_transform(y_housing)

# Handle imbalance using SMOTE for Assistive Technology
smote = SMOTE(random_state=42)
X_resampled_assistive, y_resampled_assistive = smote.fit_resample(X_scaled, y_assistive_encoded)

# Handle imbalance using SMOTE for Customized Housing Design
X_resampled_housing, y_resampled_housing = smote.fit_resample(X_scaled, y_housing_encoded)

# Split the dataset into training and testing sets for both tasks
X_train_assistive, X_test_assistive, y_train_assistive, y_test_assistive = train_test_split(X_resampled_assistive, y_resampled_assistive, test_size=0.2, random_state=42)
X_train_housing, X_test_housing, y_train_housing, y_test_housing = train_test_split(X_resampled_housing, y_resampled_housing, test_size=0.2, random_state=42)

# Initialize base models
xgb_clf = XGBClassifier(random_state=42)
rf_clf = RandomForestClassifier(random_state=42)
log_reg = LogisticRegression(max_iter=1000)

# Create a Stacking Classifier with XGBoost, Random Forest, and Logistic Regression
stacking_clf_assistive = StackingClassifier(estimators=[
    ('rf', rf_clf),
    ('xgb', xgb_clf),
    ('log_reg', log_reg)
], final_estimator=LogisticRegression(max_iter=1000))

# Use GridSearchCV to tune parameters for the Stacking model (Assistive Technology)
param_grid_assistive = {
    'xgb__n_estimators': [100, 200],
    'xgb__max_depth': [5, 10],
    'rf__n_estimators': [100, 200],
    'final_estimator__C': [0.1, 1, 10]
}

grid_search_assistive = GridSearchCV(estimator=stacking_clf_assistive, param_grid=param_grid_assistive, cv=3, verbose=2, n_jobs=-1)
grid_search_assistive.fit(X_train_assistive, y_train_assistive)

# Use GridSearchCV to tune parameters for the Stacking model (Customized Housing Design)
stacking_clf_housing = StackingClassifier(estimators=[
    ('rf', rf_clf),
    ('xgb', xgb_clf),
    ('log_reg', log_reg)
], final_estimator=LogisticRegression(max_iter=1000))

param_grid_housing = {
    'xgb__n_estimators': [100, 200],
    'xgb__max_depth': [5, 10],
    'rf__n_estimators': [100, 200],
    'final_estimator__C': [0.1, 1, 10]
}

grid_search_housing = GridSearchCV(estimator=stacking_clf_housing, param_grid=param_grid_housing, cv=3, verbose=2, n_jobs=-1)
grid_search_housing.fit(X_train_housing, y_train_housing)

# Best parameters from the search
print(f"Best parameters for Assistive Technology: {grid_search_assistive.best_params_}")
print(f"Best parameters for Customized Housing Design: {grid_search_housing.best_params_}")

# Use the best estimator for predictions
best_stacking_assistive = grid_search_assistive.best_estimator_
best_stacking_housing = grid_search_housing.best_estimator_

# Make predictions on the test set using the best models
y_pred_assistive = best_stacking_assistive.predict(X_test_assistive)
y_pred_housing = best_stacking_housing.predict(X_test_housing)

# Evaluate the model for Assistive Technology
accuracy_assistive = accuracy_score(y_test_assistive, y_pred_assistive)
print(f"Test accuracy for Assistive Technology: {accuracy_assistive * 100:.2f}%")

# Evaluate the model for Customized Housing Design
accuracy_housing = accuracy_score(y_test_housing, y_pred_housing)
print(f"Test accuracy for Customized Housing Design: {accuracy_housing * 100:.2f}%")

# Function to predict Assistive Technology and Housing Design separately
def predict_assistive_and_design(specific_disease, body_functions, activities, home_functions):
    input_data = pd.DataFrame({
        'Specific Disease': [specific_disease],
        'Body Functions': [body_functions],
        'Activities and Participation': [activities],
        'Home Functions': [home_functions]
    })

    # Apply one-hot encoding to the input data
    input_data = pd.get_dummies(input_data)

    # Create interaction features using one-hot encoded columns
    for disease_col in disease_columns:
        for body_col in body_func_columns:
            input_data[f'{disease_col}_x_{body_col}'] = input_data.get(disease_col, 0) * input_data.get(body_col, 0)

    for body_col in body_func_columns:
        for activity_col in activity_columns:
            input_data[f'{body_col}_x_{activity_col}'] = input_data.get(body_col, 0) * input_data.get(activity_col, 0)

    # Align the columns with the training data to avoid dimension mismatches
    input_data = input_data.reindex(columns=X.columns, fill_value=0)

    # Scale the input data
    input_data_scaled = scaler.transform(input_data)

    # Predict Assistive Technology
    prediction_assistive = best_stacking_assistive.predict(input_data_scaled)

    # Predict Customized Housing Design
    prediction_housing = best_stacking_housing.predict(input_data_scaled)

    return le_assistive.inverse_transform([prediction_assistive[0]]), le_housing.inverse_transform([prediction_housing[0]])

# Example prediction
assistive_pred, housing_pred = predict_assistive_and_design('Parkinsons Disease', 'Hearing functions (b230)', 'Limited mobility (d455)', 'Old building (e150)')
print(f"Predicted Assistive Technology: {assistive_pred[0]}")
print(f"Predicted Customized Housing Design: {housing_pred[0]}")


Fitting 3 folds for each of 24 candidates, totalling 72 fits
Fitting 3 folds for each of 24 candidates, totalling 72 fits
Best parameters for Assistive Technology: {'final_estimator__C': 1, 'rf__n_estimators': 200, 'xgb__max_depth': 10, 'xgb__n_estimators': 100}
Best parameters for Customized Housing Design: {'final_estimator__C': 1, 'rf__n_estimators': 200, 'xgb__max_depth': 10, 'xgb__n_estimators': 100}
Test accuracy for Assistive Technology: 22.62%
Test accuracy for Customized Housing Design: 17.65%
Predicted Assistive Technology: Cognitive Assistance Device (e125)
Predicted Customized Housing Design: Enhanced lighting (e155)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_selection import RFE

# Load your dataset
df = pd.read_csv('/content/DD.csv')

# Drop unnecessary columns and separate the tasks
X = df[['Specific Disease', 'Body Functions', 'Activities and Participation', 'Home Functions']]
y_assistive = df['Assistive Technology']  # Separate the target for Assistive Technology
y_housing = df['Customized Housing Design']  # Separate the target for Customized Housing Design

# Apply one-hot encoding to categorical columns in X
X = pd.get_dummies(X, columns=['Specific Disease', 'Body Functions', 'Activities and Participation', 'Home Functions'])

# Convert all columns in X to float
X = X.astype(float)

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Encode the target variables (Assistive Technology and Customized Housing Design)
le_assistive = LabelEncoder()
y_assistive_encoded = le_assistive.fit_transform(y_assistive)

le_housing = LabelEncoder()
y_housing_encoded = le_housing.fit_transform(y_housing)

# Handle imbalance using SMOTE for Assistive Technology
smote = SMOTE(random_state=42)
X_resampled_assistive, y_resampled_assistive = smote.fit_resample(X_scaled, y_assistive_encoded)

# Handle imbalance using SMOTE for Customized Housing Design
X_resampled_housing, y_resampled_housing = smote.fit_resample(X_scaled, y_housing_encoded)

# Split the dataset into training and testing sets for both tasks
X_train_assistive, X_test_assistive, y_train_assistive, y_test_assistive = train_test_split(X_resampled_assistive, y_resampled_assistive, test_size=0.2, random_state=42)
X_train_housing, X_test_housing, y_train_housing, y_test_housing = train_test_split(X_resampled_housing, y_resampled_housing, test_size=0.2, random_state=42)

# Initialize base models
xgb_clf = XGBClassifier(random_state=42)
rf_clf = RandomForestClassifier(random_state=42)

# Feature Selection using Recursive Feature Elimination (RFE) with RandomForest
rfe = RFE(estimator=rf_clf, n_features_to_select=20)  # Select top 20 features
X_train_assistive_rfe = rfe.fit_transform(X_train_assistive, y_train_assistive)
X_test_assistive_rfe = rfe.transform(X_test_assistive)

X_train_housing_rfe = rfe.fit_transform(X_train_housing, y_train_housing)
X_test_housing_rfe = rfe.transform(X_test_housing)

# Create a Voting Classifier for Assistive Technology with soft voting
voting_clf_assistive = VotingClassifier(estimators=[
    ('rf', rf_clf),
    ('xgb', xgb_clf)
], voting='soft')

# Use GridSearchCV to tune parameters for the Voting model (Assistive Technology)
param_grid_assistive = {
    'xgb__n_estimators': [100, 200],
    'xgb__max_depth': [5, 10],
    'rf__n_estimators': [100, 200],
    'rf__max_depth': [10, 20]
}

grid_search_assistive = GridSearchCV(estimator=voting_clf_assistive, param_grid=param_grid_assistive, cv=3, verbose=2, n_jobs=-1)
grid_search_assistive.fit(X_train_assistive_rfe, y_train_assistive)

# Create a Voting Classifier for Customized Housing Design with soft voting
voting_clf_housing = VotingClassifier(estimators=[
    ('rf', rf_clf),
    ('xgb', xgb_clf)
], voting='soft')

# Use GridSearchCV to tune parameters for the Voting model (Customized Housing Design)
param_grid_housing = {
    'xgb__n_estimators': [100, 200],
    'xgb__max_depth': [5, 10],
    'rf__n_estimators': [100, 200],
    'rf__max_depth': [10, 20]
}

grid_search_housing = GridSearchCV(estimator=voting_clf_housing, param_grid=param_grid_housing, cv=3, verbose=2, n_jobs=-1)
grid_search_housing.fit(X_train_housing_rfe, y_train_housing)

# Best parameters from the search
print(f"Best parameters for Assistive Technology: {grid_search_assistive.best_params_}")
print(f"Best parameters for Customized Housing Design: {grid_search_housing.best_params_}")

# Use the best estimator for predictions
best_voting_assistive = grid_search_assistive.best_estimator_
best_voting_housing = grid_search_housing.best_estimator_

# Make predictions on the test set using the best models
y_pred_assistive = best_voting_assistive.predict(X_test_assistive_rfe)
y_pred_housing = best_voting_housing.predict(X_test_housing_rfe)

# Evaluate the model for Assistive Technology
accuracy_assistive = accuracy_score(y_test_assistive, y_pred_assistive)
print(f"Test accuracy for Assistive Technology: {accuracy_assistive * 100:.2f}%")

# Evaluate the model for Customized Housing Design
accuracy_housing = accuracy_score(y_test_housing, y_pred_housing)
print(f"Test accuracy for Customized Housing Design: {accuracy_housing * 100:.2f}%")

# Function to predict Assistive Technology and Housing Design separately
def predict_assistive_and_design(specific_disease, body_functions, activities, home_functions):
    input_data = pd.DataFrame({
        'Specific Disease': [specific_disease],
        'Body Functions': [body_functions],
        'Activities and Participation': [activities],
        'Home Functions': [home_functions]
    })

    # Apply one-hot encoding to the input data
    input_data = pd.get_dummies(input_data)

    # Align the columns with the training data to avoid dimension mismatches
    input_data = input_data.reindex(columns=X.columns, fill_value=0)

    # Scale the input data
    input_data_scaled = scaler.transform(input_data)

    # Select features using RFE
    input_data_rfe = rfe.transform(input_data_scaled)

    # Predict Assistive Technology
    prediction_assistive = best_voting_assistive.predict(input_data_rfe)

    # Predict Customized Housing Design
    prediction_housing = best_voting_housing.predict(input_data_rfe)

    return le_assistive.inverse_transform([prediction_assistive[0]]), le_housing.inverse_transform([prediction_housing[0]])

# Example prediction
assistive_pred, housing_pred = predict_assistive_and_design('Parkinsons Disease', 'Hearing functions (b230)', 'Limited mobility (d455)', 'Old building (e150)')
print(f"Predicted Assistive Technology: {assistive_pred[0]}")
print(f"Predicted Customized Housing Design: {housing_pred[0]}")


Fitting 3 folds for each of 16 candidates, totalling 48 fits
Fitting 3 folds for each of 16 candidates, totalling 48 fits
Best parameters for Assistive Technology: {'rf__max_depth': 10, 'rf__n_estimators': 200, 'xgb__max_depth': 5, 'xgb__n_estimators': 100}
Best parameters for Customized Housing Design: {'rf__max_depth': 20, 'rf__n_estimators': 200, 'xgb__max_depth': 10, 'xgb__n_estimators': 100}
Test accuracy for Assistive Technology: 25.79%
Test accuracy for Customized Housing Design: 19.61%
Predicted Assistive Technology: Voice-activated devices (e120)
Predicted Customized Housing Design: Smart Home Integration (e155)
