In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import joblib
import os

# Define the file paths
file_path = '/Users/claudiaislas/Desktop/Fitabase Data 4.12.16-5.12.16'
daily_activity_file = os.path.join(file_path, 'dailyActivity_merged.csv')
heartrate_seconds_file = os.path.join(file_path, 'heartrate_seconds_merged.csv')
sleep_data_file = os.path.join(file_path, 'sleepDay_merged.csv')
weight_data_file = os.path.join(file_path, 'weightLogInfo_merged.csv')

# Load data
daily_activity = pd.read_csv(daily_activity_file)
heartrate_seconds = pd.read_csv(heartrate_seconds_file)
sleep_data = pd.read_csv(sleep_data_file)
weight_data = pd.read_csv(weight_data_file)

# Clean and merge data
heart_rate_avg = heartrate_seconds.groupby('Id')['Value'].mean().reset_index()
minutes_asleep_avg = sleep_data.groupby('Id')['TotalMinutesAsleep'].mean().reset_index()
weight_avg = weight_data.groupby('Id')['WeightKg'].mean().reset_index()

daily_activity = daily_activity.merge(heart_rate_avg, on='Id', how='left', suffixes=('', '_HeartRate'))
daily_activity = daily_activity.merge(minutes_asleep_avg, on='Id', how='left', suffixes=('', '_MinutesAsleep'))
daily_activity = daily_activity.merge(weight_avg, on='Id', how='left', suffixes=('', '_Weight'))

daily_activity.rename(columns={
    'Value': 'HeartRate',
    'TotalMinutesAsleep': 'MinutesAsleep',
    'WeightKg': 'Weight'
}, inplace=True)

# Fill missing values
daily_activity.fillna(daily_activity.mean(numeric_only=True), inplace=True)

# Save the cleaned data
cleaned_data_file = os.path.join(file_path, 'cleaned_combined_data.csv')
daily_activity.to_csv(cleaned_data_file, index=False)





In [7]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import joblib

# Define the file paths
file_path = '/Users/claudiaislas/Desktop/Fitabase Data 4.12.16-5.12.16'
daily_activity_file = os.path.join(file_path, 'dailyActivity_merged.csv')
heartrate_seconds_file = os.path.join(file_path, 'heartrate_seconds_merged.csv')
sleep_data_file = os.path.join(file_path, 'sleepDay_merged.csv')
weight_data_file = os.path.join(file_path, 'weightLogInfo_merged.csv')

# Load data
daily_activity = pd.read_csv(daily_activity_file)
heartrate_seconds = pd.read_csv(heartrate_seconds_file)
sleep_data = pd.read_csv(sleep_data_file)
weight_data = pd.read_csv(weight_data_file)

# Print columns to verify
print("Daily Activity Columns:", daily_activity.columns)
print("Heart Rate Columns:", heartrate_seconds.columns)
print("Sleep Data Columns:", sleep_data.columns)
print("Weight Data Columns:", weight_data.columns)

# Clean and merge data
heart_rate_avg = heartrate_seconds.groupby('Id')['Value'].mean().reset_index()
minutes_asleep_avg = sleep_data.groupby('Id')['TotalMinutesAsleep'].mean().reset_index()
weight_avg = weight_data.groupby('Id')['WeightKg'].mean().reset_index()

daily_activity = daily_activity.merge(heart_rate_avg, on='Id', how='left')
daily_activity = daily_activity.merge(minutes_asleep_avg, on='Id', how='left')
daily_activity = daily_activity.merge(weight_avg, on='Id', how='left')

daily_activity.rename(columns={
    'Value': 'HeartRate',
    'TotalMinutesAsleep': 'MinutesAsleep',
    'WeightKg': 'Weight'
}, inplace=True)

# Verify the columns after merging and renaming
print("Combined Data Columns:", daily_activity.columns)

# Fill missing values
daily_activity.fillna(daily_activity.mean(numeric_only=True), inplace=True)

# Save the cleaned data
cleaned_data_file = os.path.join(file_path, 'cleaned_combined_data.csv')
daily_activity.to_csv(cleaned_data_file, index=False)

# Load cleaned data
data = pd.read_csv(cleaned_data_file)

# Define features and target variable
features = ['HeartRate', 'TotalSteps', 'Calories', 'MinutesAsleep', 'Weight']
data['HeartDiseaseRisk'] = ((data['HeartRate'] > 100) | (data['TotalSteps'] < 5000)) & (data['Weight'] > 80)
data['DiabetesRisk'] = ((data['Calories'] > 2500) | (data['MinutesAsleep'] < 300))

# Check the distribution of the target variables
print(data['HeartDiseaseRisk'].value_counts())
print(data['DiabetesRisk'].value_counts())

X = data[features]
y_heart = data['HeartDiseaseRisk']
y_diabetes = data['DiabetesRisk']

# Ensure that both classes are present
if len(data['HeartDiseaseRisk'].unique()) < 2:
    raise ValueError("HeartDiseaseRisk data contains only one class. Please adjust the conditions to ensure both classes are present.")
if len(data['DiabetesRisk'].unique()) < 2:
    raise ValueError("DiabetesRisk data contains only one class. Please adjust the conditions to ensure both classes are present.")

# Split the data into training and testing sets
X_train_heart, X_test_heart, y_train_heart, y_test_heart = train_test_split(X, y_heart, test_size=0.3, random_state=42)
X_train_diabetes, X_test_diabetes, y_train_diabetes, y_test_diabetes = train_test_split(X, y_diabetes, test_size=0.3, random_state=42)

# Train the logistic regression model
model_heart = LogisticRegression()
model_heart.fit(X_train_heart, y_train_heart)
joblib.dump(model_heart, 'heart_disease_risk_model.pkl')

model_diabetes = LogisticRegression()
model_diabetes.fit(X_train_diabetes, y_train_diabetes)
joblib.dump(model_diabetes, 'diabetes_risk_model.pkl')


Daily Activity Columns: Index(['Id', 'ActivityDate', 'TotalSteps', 'TotalDistance', 'TrackerDistance',
       'LoggedActivitiesDistance', 'VeryActiveDistance',
       'ModeratelyActiveDistance', 'LightActiveDistance',
       'SedentaryActiveDistance', 'VeryActiveMinutes', 'FairlyActiveMinutes',
       'LightlyActiveMinutes', 'SedentaryMinutes', 'Calories'],
      dtype='object')
Heart Rate Columns: Index(['Id', 'Time', 'Value'], dtype='object')
Sleep Data Columns: Index(['Id', 'SleepDay', 'TotalSleepRecords', 'TotalMinutesAsleep',
       'TotalTimeInBed'],
      dtype='object')
Weight Data Columns: Index(['Id', 'Date', 'WeightKg', 'WeightPounds', 'Fat', 'BMI',
       'IsManualReport', 'LogId'],
      dtype='object')
Combined Data Columns: Index(['Id', 'ActivityDate', 'TotalSteps', 'TotalDistance', 'TrackerDistance',
       'LoggedActivitiesDistance', 'VeryActiveDistance',
       'ModeratelyActiveDistance', 'LightActiveDistance',
       'SedentaryActiveDistance', 'VeryActiveMinutes', 'F

['diabetes_risk_model.pkl']

In [5]:
print(data['HeartDiseaseRisk'].value_counts())
print(data['DiabetesRisk'].value_counts())


False    940
Name: HeartDiseaseRisk, dtype: int64
False    874
True      66
Name: DiabetesRisk, dtype: int64


In [9]:
import matplotlib
matplotlib.use('Agg')
from flask import Flask, request, render_template
import pandas as pd
import matplotlib.pyplot as plt
import io
import base64
import os
import joblib

app = Flask(__name__)

# Load the trained models
model_heart = joblib.load('heart_disease_risk_model.pkl')
model_diabetes = joblib.load('diabetes_risk_model.pkl')

# Define the file path for the cleaned combined data
cleaned_data_file = '/Users/claudiaislas/Desktop/Fitabase Data 4.12.16-5.12.16/cleaned_combined_data.csv'

@app.route('/')
def home():
    return render_template('index.html')

@app.route('/upload', methods=['POST'])
def upload():
    # Read the cleaned combined data
    data = pd.read_csv(cleaned_data_file)

    # Perform data analysis
    summary = data.describe().to_html()

    # Generate plot
    fig, ax = plt.subplots()
    data.plot(ax=ax)
    img = io.BytesIO()
    plt.savefig(img, format='png')
    img.seek(0)
    plot_url = base64.b64encode(img.getvalue()).decode()

    # Predict heart disease and diabetes risk
    features = ['HeartRate', 'TotalSteps', 'Calories', 'MinutesAsleep', 'Weight']
    data['HeartDiseaseRisk'] = model_heart.predict(data[features])
    data['DiabetesRisk'] = model_diabetes.predict(data[features])

    # Identify users at risk
    at_risk_heart = data[data['HeartDiseaseRisk'] == 1]
    at_risk_diabetes = data[data['DiabetesRisk'] == 1]
    risk_message_heart = f"{len(at_risk_heart)} users are at risk of heart disease."
    risk_message_diabetes = f"{len(at_risk_diabetes)} users are at risk of diabetes."

    return render_template('result.html', summary=summary, plot_url=plot_url,
                          risk_message_heart=risk_message_heart,
                          risk_message_diabetes=risk_message_diabetes)

if __name__ == '__main__':
    app.run(debug=True, port=5005)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5005
Press CTRL+C to quit
 * Restarting with watchdog (fsevents)
Traceback (most recent call last):
  File "/Users/claudiaislas/anaconda3/lib/python3.10/site-packages/ipykernel_launcher.py", line 15, in <module>
    from ipykernel import kernelapp as app
  File "/Users/claudiaislas/anaconda3/lib/python3.10/site-packages/ipykernel/__init__.py", line 5, in <module>
    from .connect import *  # noqa
  File "/Users/claudiaislas/anaconda3/lib/python3.10/site-packages/ipykernel/connect.py", line 11, in <module>
    import jupyter_client
  File "/Users/claudiaislas/anaconda3/lib/python3.10/site-packages/jupyter_client/__init__.py", line 8, in <module>
    from .asynchronous import AsyncKernelClient  # noqa
  File "/Users/claudiaislas/anaconda3/lib/python3.10/site-packages/jupyter_client/asynchronous/__init__.py", line 1, in <module>
    from .client import AsyncKernelClient  # noqa
  File "/Users/claudiaislas/anaconda3/lib/python3.10/site-packages/jupyter_clien

SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [10]:
%tb


SystemExit: 1