In [None]:
# Import necessary libraries
import os
import pandas as pd
import json
from datetime import datetime

In [None]:
# Load the mock data from the CSV file
file_path = "/content/MOCK_DATA.csv"

# Read the CSV into a DataFrame
mock_data = pd.read_csv(file_path)

# Display the first few rows to check the structure
mock_data.head()

Unnamed: 0,user_id,first_name,last_name,email,gender,date,daily_steps,daily_heart_rate,daily_sleep_hours,heart_rate_variability,daily_calories_burned,daily_distance_km,daily_active_minutes,daily_resting_heart_rate,daily_stress_level,daily_blood_pressure_systolic,daily_blood_pressure_diastolic,daily_oxygen_saturation,daily_weight_kg
0,12346,Reese,Faithorn,rfaithorn0@globo.com,Male,11/21/2024,8809,75,6.7,47,2358,8.0,120,68,5,122,82,98,76.2
1,12345,Ailene,Marcus,amarcus1@themeforest.net,Female,11/21/2024,8600,76,6.8,45,2399,7.6,118,65,4,124,81,97,76.8
2,12346,Franny,Arnet,farnet2@constantcontact.com,Male,11/21/2024,9311,75,7.0,48,2394,7.9,112,67,4,120,82,98,75.2
3,12346,Gardner,Izzatt,gizzatt3@auda.org.au,Male,11/21/2024,9292,77,6.8,50,2213,7.7,119,67,5,122,82,97,75.4
4,12346,Dotty,Agiolfinger,dagiolfinger4@dailymotion.com,Female,11/21/2024,8535,78,6.9,45,2354,7.9,119,65,4,122,82,97,76.5


In [None]:
def normalize_data(df):
    """
    Normalizes and processes the data.

    Args:
    - df (pd.DataFrame): The input dataframe containing mock health metrics data.

    Returns:
    - pd.DataFrame: The normalized data.
    """
    # Ensure that date is in the correct datetime format
    # The original format was '%Y-%m-%d', but the data is in 'MM/DD/YYYY' format
    df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y')

    # Normalize or adjust any metrics if necessary
    # For instance, convert weight from pounds to kilograms if it were in pounds
    # df['weight_kg'] = df['weight_kg'] * 0.453592

    return df

# Normalize the mock data
normalized_data = normalize_data(mock_data)

# Display the normalized data to check
normalized_data.head()

Unnamed: 0,user_id,first_name,last_name,email,gender,date,daily_steps,daily_heart_rate,daily_sleep_hours,heart_rate_variability,daily_calories_burned,daily_distance_km,daily_active_minutes,daily_resting_heart_rate,daily_stress_level,daily_blood_pressure_systolic,daily_blood_pressure_diastolic,daily_oxygen_saturation,daily_weight_kg
0,12346,Reese,Faithorn,rfaithorn0@globo.com,Male,2024-11-21,8809,75,6.7,47,2358,8.0,120,68,5,122,82,98,76.2
1,12345,Ailene,Marcus,amarcus1@themeforest.net,Female,2024-11-21,8600,76,6.8,45,2399,7.6,118,65,4,124,81,97,76.8
2,12346,Franny,Arnet,farnet2@constantcontact.com,Male,2024-11-21,9311,75,7.0,48,2394,7.9,112,67,4,120,82,98,75.2
3,12346,Gardner,Izzatt,gizzatt3@auda.org.au,Male,2024-11-21,9292,77,6.8,50,2213,7.7,119,67,5,122,82,97,75.4
4,12346,Dotty,Agiolfinger,dagiolfinger4@dailymotion.com,Female,2024-11-21,8535,78,6.9,45,2354,7.9,119,65,4,122,82,97,76.5


In [None]:
# Function to save the normalized data into both CSV and JSON formats
def save_data(df, output_dir):
    """
    Save the normalized data to both CSV and JSON formats.

    Args:
        df (pd.DataFrame): Normalized data.
        output_dir (str): Directory to save the files.
    """
    # Ensure the directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Define paths for the output files
    json_path = os.path.join(output_dir, "processed_health_data.json")
    csv_path = os.path.join(output_dir, "processed_health_data.csv")

    # Save as JSON (each record as a separate JSON object)
    df.to_json(json_path, orient="records", lines=True)
    print(f"Data saved to JSON: {json_path}")

    # Save as CSV
    df.to_csv(csv_path, index=False)
    print(f"Data saved to CSV: {csv_path}")

# Example usage
# Assuming 'normalized_data' is your DataFrame and '/content/output' is the desired output folder
output_dir = '/content/output'
save_data(normalized_data, output_dir)

# Also, saving processed CSV file separately if needed
output_file = '/content/processed_health_data.csv'
normalized_data.to_csv(output_file, index=False)

# Display the path to the saved file
output_file

Data saved to JSON: /content/output/processed_health_data.json
Data saved to CSV: /content/output/processed_health_data.csv


'/content/processed_health_data.csv'

In [None]:
# To show the path where your processed data is saved
output_file

'/content/processed_health_data.csv'

# Step 2: Specialized AI Agents


Since we’ve generated a mock dataset from Mockaroo and didn’t interact with any live API, the next step will be to integrate a Hugging Face model. We’ll proceed by interacting with the model in a .py file, and then move forward to developing the Streamlit app to display the insights.




In [None]:
file_path = '/content/output/processed_health_data.csv'

# Read the CSV into a DataFrame to get the dataset generated from mockaroo we will be working with the rest of the way
df = pd.read_csv(file_path)

# Display the first few rows to check the structure
df.head()

Unnamed: 0,user_id,first_name,last_name,email,gender,date,daily_steps,daily_heart_rate,daily_sleep_hours,heart_rate_variability,daily_calories_burned,daily_distance_km,daily_active_minutes,daily_resting_heart_rate,daily_stress_level,daily_blood_pressure_systolic,daily_blood_pressure_diastolic,daily_oxygen_saturation,daily_weight_kg
0,12346,Reese,Faithorn,rfaithorn0@globo.com,Male,2024-11-21,8809,75,6.7,47,2358,8.0,120,68,5,122,82,98,76.2
1,12345,Ailene,Marcus,amarcus1@themeforest.net,Female,2024-11-21,8600,76,6.8,45,2399,7.6,118,65,4,124,81,97,76.8
2,12346,Franny,Arnet,farnet2@constantcontact.com,Male,2024-11-21,9311,75,7.0,48,2394,7.9,112,67,4,120,82,98,75.2
3,12346,Gardner,Izzatt,gizzatt3@auda.org.au,Male,2024-11-21,9292,77,6.8,50,2213,7.7,119,67,5,122,82,97,75.4
4,12346,Dotty,Agiolfinger,dagiolfinger4@dailymotion.com,Female,2024-11-21,8535,78,6.9,45,2354,7.9,119,65,4,122,82,97,76.5


In [12]:
# Perform basic EDA to inspect the dataset
def basic_eda(df):
    """
    Perform basic EDA on the dataset.
    Args:
        df (pd.DataFrame): Input DataFrame.
    Returns:
        None
    """
    print("Dataset Overview:")
    print(df.info())  # Summary of dataset

    print("\nColumn Names:")
    print(df.columns.tolist())  # List of column names

    print("\nFirst 5 Rows:")
    print(df.head())  # Display the first few rows of the dataset

# Perform EDA
basic_eda(df)

Dataset Overview:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 19 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   user_id                         1000 non-null   int64  
 1   first_name                      1000 non-null   object 
 2   last_name                       1000 non-null   object 
 3   email                           1000 non-null   object 
 4   gender                          1000 non-null   object 
 5   date                            1000 non-null   object 
 6   daily_steps                     1000 non-null   int64  
 7   daily_heart_rate                1000 non-null   int64  
 8   daily_sleep_hours               1000 non-null   float64
 9   heart_rate_variability          1000 non-null   int64  
 10  daily_calories_burned           1000 non-null   int64  
 11  daily_distance_km               1000 non-null   float64
 12  daily_active_minu

In [13]:
# 1. Fitness Tracking Agent implementation

def fitness_tracking_agent(df):
    # Metrics of interest
    activity_data = df[['daily_steps', 'daily_calories_burned', 'daily_active_minutes']]

    # EDA
    activity_summary = activity_data.describe()

    # Weekly activity suggestion
    avg_steps = activity_data['daily_steps'].mean()
    suggested_steps = avg_steps * 1.1  # 10% increase
    output = {
        "Average Daily Steps": avg_steps,
        "Suggested Daily Steps": suggested_steps,
        "Message": "Increasing steps by 10% can improve cardiovascular health."
    }
    return output

In [14]:
fitness_output = fitness_tracking_agent(df)

In [15]:
print(fitness_output)

{'Average Daily Steps': 8999.118, 'Suggested Daily Steps': 9899.029800000002, 'Message': 'Increasing steps by 10% can improve cardiovascular health.'}
