In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from datetime import datetime, timedelta

In [None]:
# Load data from CSV file
data = pd.read_csv('women_health.csv')

In [None]:
# Assume today's date for the entire dataset
data['current_date'] = pd.Timestamp(datetime.today())

In [None]:
# Assume last period start date was 20 days ago for everyone
data['last_period_start_date'] = data['current_date'] - timedelta(days=20)

In [None]:
# Convert date columns to datetime
data['current_date'] = pd.to_datetime(data['current_date'])
data['last_period_start_date'] = pd.to_datetime(data['last_period_start_date'])

In [None]:
# Calculate the cycle day
data['cycle_day'] = (data['current_date'] - data['last_period_start_date']).dt.days % data['interval_length']

In [None]:
# Encoding categorical variables
data = pd.get_dummies(data, columns=['regularity', 'flow_type'])

In [None]:
# Label the phases based on cycle day
def label_phase(row):
    if 0 <= row['cycle_day'] <= 4:
        return 'Menstrual'
    elif 5 <= row['cycle_day'] <= 13:
        return 'Follicular'
    elif row['cycle_day'] == 14:
        return 'Ovulation'
    else:
        return 'Luteal'

In [None]:
# Apply the label_phase function to create the 'phase' column
data['phase'] = data.apply(label_phase, axis=1)

In [None]:
# Prepare features and labels
X = data.drop(['phase', 'current_date', 'last_period_start_date'], axis=1)
y = data['phase']

In [None]:
# Check data types and fix if necessary
print(X.dtypes)
X = X.apply(pd.to_numeric, errors='coerce')  # Convert columns to numeric, forcing errors to NaN

# Drop rows with NaN values if any
X = X.dropna()
y = y[X.index]  # Ensure y matches the indices of X

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [None]:
# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Predict the phase for new data
new_data = pd.DataFrame({
    'gender': ['female'],
    'age': [25],
    'regularity': ['regular'],
    'interval_length': [28],
    'duration': [5],
    'flow_type': ['heavy'],
    'current_date': [pd.Timestamp(datetime.today())],
    'last_period_start_date': [pd.Timestamp(datetime.today() - timedelta(days=15))]
})

# Feature engineering for new data
new_data['cycle_day'] = (new_data['current_date'] - new_data['last_period_start_date']).dt.days % new_data['interval_length']
new_data = pd.get_dummies(new_data, columns=['regularity', 'flow_type'])

# Ensure the new data has the same columns as the training data
new_data = new_data.reindex(columns=X.columns, fill_value=0)

# Predict the phase
predicted_phase = model.predict(new_data)
print(f'Predicted Phase: {predicted_phase[0]}')

# Recommend food based on the phase
food_recommendations = {
    'Menstrual': ['Iron-rich foods', 'Hydration'],
    'Follicular': ['Folate-rich foods', 'Protein'],
    'Ovulation': ['Anti-inflammatory foods', 'Healthy fats'],
    'Luteal': ['Magnesium-rich foods', 'Complex carbs']
}

print(f'Recommended Foods: {food_recommendations[predicted_phase[0]]}')