In [2]:
import pandas as pd
import numpy as np

In [3]:
data=pd.read_csv('personal_health_data.csv')

In [4]:
data.columns

Index(['User_ID', 'Age', 'Gender', 'Weight', 'Height', 'Medical_Conditions',
       'Medication', 'Smoker', 'Alcohol_Consumption', 'Timestamp',
       'Day_of_Week', 'Sleep_Duration', 'Deep_Sleep_Duration',
       'REM_Sleep_Duration', 'Wakeups', 'Snoring', 'Heart_Rate',
       'Blood_Oxygen_Level', 'ECG', 'Calories_Intake', 'Water_Intake',
       'Stress_Level', 'Mood', 'Skin_Temperature', 'Body_Fat_Percentage',
       'Muscle_Mass', 'Health_Score', 'Anomaly_Flag'],
      dtype='object')

In [5]:
df = pd.DataFrame(data)

In [6]:
df.shape

(10000, 28)

In [7]:
df.isnull().sum()

User_ID                   0
Age                       0
Gender                    0
Weight                    0
Height                    0
Medical_Conditions     3304
Medication                0
Smoker                    0
Alcohol_Consumption    3360
Timestamp                 0
Day_of_Week               0
Sleep_Duration            0
Deep_Sleep_Duration       0
REM_Sleep_Duration        0
Wakeups                   0
Snoring                   0
Heart_Rate                0
Blood_Oxygen_Level        0
ECG                       0
Calories_Intake           0
Water_Intake              0
Stress_Level              0
Mood                      0
Skin_Temperature          0
Body_Fat_Percentage       0
Muscle_Mass               0
Health_Score              0
Anomaly_Flag              0
dtype: int64

In [8]:
# Calculate BMI and add it as a column (BMI = Weight (kg) / (Height (m)^2))
df['BMI'] = df['Weight'] / (df['Height'] / 100) ** 2

# Convert 'Stress_Level' to numeric, setting errors='coerce' to handle non-numeric values
df['Stress_Level'] = pd.to_numeric(df['Stress_Level'], errors='coerce')

# Handle NaN values by filling with 0 or other strategy
df['Stress_Level'].fillna(0, inplace=True)

# Health Risk Factor Example: combining BMI and stress level (simplified)
df['Health_Risk_Factor'] = df['BMI'] + df['Stress_Level']

# Example Engagement Level: Simplified
df['Engagement_Level'] = df['Sleep_Duration'] - df['Stress_Level']

# Calculate churn rate: Assuming 'Anomaly_Flag' = 1 means high risk of churn
churn_rate = df['Anomaly_Flag'].mean()  # % of users with Anomaly_Flag = 1

df['Revenue'] = df['Health_Score'] * 10  # Example revenue calculation
ARPU = df['Revenue'].mean()

# Calculate CLTV (Customer Lifetime Value)
# Simplified: Assume Gross Margin = 70% (can vary by business)
gross_margin = 0.7
CLTV = ARPU * gross_margin * (1 / churn_rate if churn_rate != 0 else 0)

# Print key metrics
print(f"Churn Rate: {churn_rate:.2%}")
print(f"ARPU: ${ARPU:.2f}")
print(f"CLTV: ${CLTV:.2f}")

# Print the DataFrame to verify the calculations
print(df.head())

Churn Rate: 62.28%
ARPU: $496.93
CLTV: $558.52
   User_ID  Age Gender      Weight      Height Medical_Conditions Medication  \
0  U_00001   61  Other   88.677623  178.524231           Diabetes         No   
1  U_00002   24   Male   86.742084  200.346358                NaN        Yes   
2  U_00003   81  Other   61.656054  195.971356       Hypertension        Yes   
3  U_00004   70  Other  141.513794  164.178216       Hypertension         No   
4  U_00005   30  Other   80.552872  146.920675           Diabetes        Yes   

  Smoker Alcohol_Consumption            Timestamp  ...     Mood  \
0     No            Moderate  2021-01-01 00:00:00  ...  Neutral   
1     No            Moderate  2021-01-01 01:00:00  ...  Anxious   
2     No            Moderate  2021-01-01 02:00:00  ...      Sad   
3     No               Heavy  2021-01-01 03:00:00  ...    Happy   
4    Yes               Heavy  2021-01-01 04:00:00  ...    Happy   

   Skin_Temperature  Body_Fat_Percentage  Muscle_Mass  Health_Score  