In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
file_paths = ['CAD and exercise.csv','CAD and exercise1.csv', 'CAD and exercise2.csv', 'CTG Alzeimers disease and exercise.csv', 'CTG Alzheimer disease and physical activity.csv', 'CTG Alzheimer disease.csv', 'CTG CAD and phycial activity.csv','CTG Cardiac and exercise.csv','CTG Cardio and exercise.csv','CTG Cardio and physical activity.csv', 'CTG Cardiovascular disease and exercise.csv', 'CTG Cardiovascular disease and physical activity.csv','CTG Chronic kidney disease and exercise.csv','CTG Chronic kidney disease and physical activity.csv','CTG Diabetes and exercise.csv','CTG Diabetes and physical activity.csv','CTG Diabetes mellitus and exercise.csv','CTG Diabetes mellitus type 2 and exercise.csv','CTG Diabetes mellitus type 2 and physical activity.csv', 'CTG Kneeosteoarthritis and exercise.csv','CTG Kneeosteoarthritis and physical activity.csv','CTG Obesity and exercise.csv','CTG Obesity and physical activity.csv','CTG Osteoarthritis and exercise.csv','CTG Osteoarthritis and physical activity.csv','CTG PAD and exercise 1.csv','CTG PAD and exercise.csv','CTG PAD and physical activity.csv','CTG PAD and physical activity1.csv','CTG Physical activity and CAD.csv','CTG heart and exercise.csv','CTG stroke and exercise.csv','CTG stroke and exercise1.csv','CTG stroke and physical activity.csv','CTG stroke and physical activity1.csv','CTG studies diabetes and physical activity.csv']

In [None]:
# Load each file into a dataframe and add it to the list
dataframes = []
for file in file_paths:
    data = pd.read_csv(file)
    dataframes.append(data)

In [None]:
# Merge the dataframes into a single dataframe
merged_data = pd.concat(dataframes, ignore_index=True)

In [None]:
merged_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1131 entries, 0 to 1130
Data columns (total 37 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   NCT Number                  1080 non-null   object 
 1   Study Title                 1080 non-null   object 
 2   Study URL                   1080 non-null   object 
 3   Acronym                     364 non-null    object 
 4   Study Status                1080 non-null   object 
 5   Brief Summary               1080 non-null   object 
 6   Study Results               1080 non-null   object 
 7   Conditions                  1080 non-null   object 
 8   Interventions               1080 non-null   object 
 9   Primary Outcome Measures    1080 non-null   object 
 10  Secondary Outcome Measures  904 non-null    object 
 11  Other Outcome Measures      216 non-null    object 
 12  Sponsor                     1080 non-null   object 
 13  Collaborators               601 n

In [None]:
merged_data.isnull().sum()


NCT Number                      51
Study Title                     51
Study URL                       51
Acronym                        767
Study Status                    51
Brief Summary                   51
Study Results                   51
Conditions                      51
Interventions                   51
Primary Outcome Measures        51
Secondary Outcome Measures     227
Other Outcome Measures         915
Sponsor                         51
Collaborators                  530
Sex                             54
Age                             51
Phases                         978
Enrollment                      51
Funder Type                     51
Study Type                      51
Study Design                    51
Other IDs                       51
Start Date                      51
Primary Completion Date         51
Completion Date                 51
First Posted                    51
Results First Posted           520
Last Update Posted              51
Locations           

In [None]:
# Handling missing values
relevant_columns = ['Conditions', 'Interventions', 'Primary Outcome Measures', 'Secondary Outcome Measures', 'Other Outcome Measures', 'Sex', 'Age']
merged_data = merged_data.dropna(subset=relevant_columns)

In [None]:

# Selecting relevant data
relevant_data = merged_data[relevant_columns]

In [None]:
# Encoding categorical variables using One-Hot Encoding
categorical_features = ['Conditions', 'Interventions', 'Primary Outcome Measures', 'Secondary Outcome Measures', 'Other Outcome Measures', 'Sex', 'Age']
encoder = OneHotEncoder()
encoded_features = encoder.fit_transform(relevant_data[categorical_features])


In [None]:
# Creating a dataframe with encoded categorical features
encoded_df = pd.DataFrame(encoded_features.toarray(), columns=encoder.get_feature_names_out(categorical_features))

In [None]:
# Combining encoded categorical and normalized numerical features into a single dataframe
final_data = pd.concat([encoded_df], axis=1)

In [None]:
final_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195 entries, 0 to 194
Columns: 316 entries, Conditions_Adolescent Obesity|Diabetes Mellitus to Age_OLDER_ADULT
dtypes: float64(316)
memory usage: 481.5 KB


In [None]:
print(final_data.columns)

Index(['Conditions_Adolescent Obesity|Diabetes Mellitus',
       'Conditions_Alzheimer Disease|Dementia|Cognitive Decline|Cognitive Impairment|Dementia, Alzheimer Type',
       'Conditions_Arthroplasty|Replacement|Knee Osteoarthritis',
       'Conditions_At Risk of Type 2 Diabetes Mellitus',
       'Conditions_Cardiac Arrest|Post Traumatic Stress Syndrome',
       'Conditions_Cardiac Rehabilitation|Physical Activity|Fruit and Vegetable Intake',
       'Conditions_Cardiovascular Diseases',
       'Conditions_Cardiovascular Diseases|Cardiovascular Risk Factor',
       'Conditions_Cardiovascular Risk Factor|Health Behavior|Physical Activity|Self Efficacy|Vascular Stiffness',
       'Conditions_Cardiovascular Risk Factor|Stress|Self Efficacy|Depressive Symptoms|Health Behavior',
       ...
       'Other Outcome Measures_physical activity as measured by the 7-Day Stanford Physical Activity Recall, The 7-Day Stanford Physical Activity Recall estimates an individual's time spent in sleep and 

In [None]:
# Example of interpreting cluster centers
cluster_centers = pd.DataFrame(kmeans.cluster_centers_, columns=final_data.columns)
print(cluster_centers)


   Conditions_Adolescent Obesity|Diabetes Mellitus  \
0                                         0.089286   
1                                         0.000000   
2                                         0.000000   

   Conditions_Alzheimer Disease|Dementia|Cognitive Decline|Cognitive Impairment|Dementia, Alzheimer Type  \
0                                           0.053571                                                       
1                                           0.000000                                                       
2                                           0.000000                                                       

   Conditions_Arthroplasty|Replacement|Knee Osteoarthritis  \
0                                           0.000000         
1                                           0.030534         
2                                           0.000000         

   Conditions_At Risk of Type 2 Diabetes Mellitus  \
0                                        0.08928

In [None]:
cluster_centers_df = pd.DataFrame(cluster_centers)
feature_importance = pd.DataFrame({'Feature': final_data.columns,
                                   'Cluster_0': cluster_centers_df[0],
                                   'Cluster_1': cluster_centers_df[1],
                                   'Cluster_2': cluster_centers_df[2]})

KeyError: 0

In [None]:
# Import necessary libraries
import pandas as pd

# Step 1: Enter demographic information
age = input("Enter your age: ")
sex = input("Enter your sex: ")
race = input("Enter your race: ")
gender = input("Enter your gender: ")
comorbidities = input("Enter any comorbidities (separated by commas if multiple): ").split(',')

# Step 2: Confirm weekly exercise goal
weekly_goal = input("Confirm your weekly exercise goal (e.g., 150 mins MVPA, plus strength training): ")

# Step 3: Preference
start_preference = input("Do you prefer a gradual start or an immediate goal? ")
exercise_variety = input("Do you prefer new types of exercise every day or consistency? ")

# Step 4: Location of exercise
exercise_location = input("Where do you plan to exercise (YMCA, Planet Fitness, other gym, home)? ")

# Step 5: Select days and times of exercise
days_of_exercise = input("Select the days of the week you plan to exercise (separated by commas if multiple): ").split(',')
times_of_exercise = input("Select the times of the day you plan to exercise (separated by commas if multiple): ").split(',')

# Step 6: Optional preference of exercise machine
exercise_machine_preference = input("Enter your preference of exercise machine (e.g., treadmill, elliptical): ")

# Step 7: Coordinate exercise buddy
exercise_buddy = input("Do you want to coordinate/have a potential exercise buddy? ")

# Step 8: Create appearance of virtual coach
# This step involves more complex graphics and is beyond the scope of this code snippet

# Displaying entered information
print("\nEntered information:")
print(f"Age: {age}")
print(f"Sex: {sex}")
print(f"Race: {race}")
print(f"Gender: {gender}")
print(f"Comorbidities: {comorbidities}")
print(f"Weekly exercise goal: {weekly_goal}")
print(f"Start preference: {start_preference}")
print(f"Exercise variety preference: {exercise_variety}")
print(f"Exercise location: {exercise_location}")
print(f"Days of exercise: {days_of_exercise}")
print(f"Times of exercise: {times_of_exercise}")
print(f"Exercise machine preference: {exercise_machine_preference}")
print(f"Exercise buddy coordination: {exercise_buddy}")


Enter your age: 26
Enter your sex: male
Enter your race: American
Enter your gender: male
Enter any comorbidities (separated by commas if multiple): hypertension
Confirm your weekly exercise goal (e.g., 150 mins MVPA, plus strength training):  strength training
Do you prefer a gradual start or an immediate goal? immediate goal
Do you prefer new types of exercise every day or consistency? every day
Where do you plan to exercise (YMCA, Planet Fitness, other gym, home)? home
Select the days of the week you plan to exercise (separated by commas if multiple): sunday, monday
Select the times of the day you plan to exercise (separated by commas if multiple): 2
Enter your preference of exercise machine (e.g., treadmill, elliptical): treadmill
Do you want to coordinate/have a potential exercise buddy? yes

Entered information:
Age: 26
Sex: male
Race: American
Gender: male
Comorbidities: ['hypertension']
Weekly exercise goal:  strength training
Start preference: immediate goal
Exercise variety p

In [None]:
import datetime
import time

# Step 1: Generate cardio and strength training exercises
# This step would require a database of exercises and routines, and is beyond the scope of this code snippet

# Step 2: User receives notification
notification_time = datetime.datetime.now() + datetime.timedelta(minutes=15)  # Notification 15 minutes before exercise session
print(f"Notification: Your exercise session will start at {notification_time.strftime('%H:%M')}.")

# Step 3: User starts recording exercise time
input("Press 'Enter' to begin recording exercise time.")

# Simulate exercise time recording
print("Exercise session started.")
time.sleep(120)  # Simulating 2 minutes of exercise time

# Step 4: User ends exercise session
input("Press 'Enter' to end exercise session.")

# Simulate exercise session end
print("Exercise session ended.")

# Step 5: Checkmark or X on types of exercises used
exercise_types = ['Cardio', 'Strength Training']  # List of exercise types
print("Types of exercises:")
for idx, exercise in enumerate(exercise_types):
    print(f"{idx+1}. {exercise}")

user_input = input("Enter the numbers of exercises used (separated by commas if multiple): ")
used_exercises = [exercise_types[int(i)-1] for i in user_input.split(',')]

# Step 6: Did they use another machine or not exercise?
if 'Strength Training' in used_exercises:
    additional_machine = input("Did you use another machine for strength training? (yes/no) ")
    if additional_machine.lower() == 'yes':
        additional_machine_used = input("Enter the name of the additional machine used: ")
        print(f"Additional machine used: {additional_machine_used}")
    else:
        print("No additional machine used.")
elif 'Cardio' in used_exercises:
    # If only cardio was used, we can ask if the user used any other cardio machine
    additional_machine = input("Did you use another cardio machine? (yes/no) ")
    if additional_machine.lower() == 'yes':
        additional_machine_used = input("Enter the name of the additional cardio machine used: ")
        print(f"Additional cardio machine used: {additional_machine_used}")
    else:
        print("No additional cardio machine used.")

# Step 7: Give reward if exercise time goal is met (gamification)
exercise_time_goal = 150  # Example exercise time goal in minutes
exercise_time = int(input("Enter the total exercise time (in minutes): "))
if exercise_time >= exercise_time_goal:
    print("Congratulations! You've met your exercise time goal.")
    # Provide some reward or feedback for achieving the goal
else:
    print("Keep up the good work! You'll reach your goal with consistency.")


Notification: Your exercise session will start at 17:02.
Press 'Enter' to begin recording exercise time.
Exercise session started.


KeyboardInterrupt: 

In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

# Load the dataset
file_paths = ['CAD and exercise.csv', 'CAD and exercise1.csv', 'CAD and exercise2.csv', 'CTG Alzeimers disease and exercise.csv', 'CTG Alzheimer disease and physical activity.csv', 'CTG Alzheimer disease.csv', 'CTG CAD and phycial activity.csv', 'CTG Cardiac and exercise.csv', 'CTG Cardio and exercise.csv', 'CTG Cardio and physical activity.csv', 'CTG Cardiovascular disease and exercise.csv', 'CTG Cardiovascular disease and physical activity.csv', 'CTG Chronic kidney disease and exercise.csv', 'CTG Chronic kidney disease and physical activity.csv', 'CTG Diabetes and exercise.csv', 'CTG Diabetes and physical activity.csv', 'CTG Diabetes mellitus and exercise.csv', 'CTG Diabetes mellitus type 2 and exercise.csv', 'CTG Diabetes mellitus type 2 and physical activity.csv', 'CTG Kneeosteoarthritis and exercise.csv', 'CTG Kneeosteoarthritis and physical activity.csv', 'CTG Obesity and exercise.csv', 'CTG Obesity and physical activity.csv', 'CTG Osteoarthritis and exercise.csv', 'CTG Osteoarthritis and physical activity.csv', 'CTG PAD and exercise 1.csv', 'CTG PAD and exercise.csv', 'CTG PAD and physical activity.csv', 'CTG PAD and physical activity1.csv', 'CTG Physical activity and CAD.csv', 'CTG heart and exercise.csv', 'CTG stroke and exercise.csv', 'CTG stroke and exercise1.csv', 'CTG stroke and physical activity.csv', 'CTG stroke and physical activity1.csv', 'CTG studies diabetes and physical activity.csv']

dataframes = []
for file in file_paths:
    data = pd.read_csv(file)
    dataframes.append(data)

merged_data = pd.concat(dataframes, ignore_index=True)

# Handling missing values
relevant_columns = ['Conditions', 'Interventions', 'Primary Outcome Measures', 'Secondary Outcome Measures', 'Other Outcome Measures', 'Sex', 'Age']
merged_data = merged_data.dropna(subset=relevant_columns)

# Preprocessing the data
# Encoding categorical variables and scaling numerical variables
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), ['Conditions', 'Interventions', 'Sex']),
        ('num', StandardScaler(), ['Age'])
    ])

# Define pipeline with preprocessor and clustering algorithm
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('clusterer', KMeans())
])

# Parameters for KMeans clustering
param_grid = {
    'clusterer__n_clusters': [2, 3, 4, 5]  # Try different number of clusters
}

# Grid search to find the best number of clusters
grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='silhouette')
grid_search.fit(merged_data.drop(columns=['Cluster']), merged_data['Cluster'])

# Best number of clusters
best_clusters = grid_search.best_params_['clusterer__n_clusters']
print("Best number of clusters:", best_clusters)

# Fit the pipeline with the best number of clusters
pipeline.set_params(clusterer__n_clusters=best_clusters)
pipeline.fit(merged_data)  # Fit the pipeline to the entire dataset

# Add cluster labels to the dataset
merged_data['Cluster'] = pipeline.named_steps['clusterer'].labels_

# Evaluating the clusters
silhouette_avg = silhouette_score(merged_data.drop(columns=['Cluster']), merged_data['Cluster'])
print("Silhouette Score:", silhouette_avg)



# Interpretation and Recommendations
# Analyzing characteristics of each cluster and providing exercise recommendations

# Cluster characteristics
cluster_summary = merged_data.groupby('Cluster').agg({
    'Conditions': lambda x: x.value_counts().index[0],
    'Interventions': lambda x: x.value_counts().index[0],
    'Age': 'mean',
    'Sex': lambda x: x.value_counts().index[0],
    'Enrollment': 'mean'
}).reset_index()

print(cluster_summary)

# Provide exercise recommendations based on cluster insights
# You can add personalized recommendations based on the characteristics of each cluster.



KeyError: "['Cluster'] not found in axis"