In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier # New Model
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample
import joblib

# 1. Load your original data
df = pd.read_csv('data/balanced_cycle_dataset.csv')

# 2. Balance the data (Upsampling)
df_regular = df[df.CycleWithPeakorNot == 1]
df_irregular = df[df.CycleWithPeakorNot == 0]
df_irregular_upsampled = resample(df_irregular, 
                                 replace=True, 
                                 n_samples=len(df_regular), 
                                 random_state=42)
df_balanced = pd.concat([df_regular, df_irregular_upsampled])

# 3. Define Features and Target (Ensure order matches app.py)
X = df_balanced[['Age', 'BMI', 'LengthofCycle', 'MeanCycleLength', 'LengthofLutealPhase', 
                'EstimatedDayofOvulation', 'FirstDayofHigh', 'TotalDaysofFertility', 
                'TotalNumberofHighDays', 'TotalNumberofPeakDays', 'CycleWithPeakorNot', 
                'CycleRegularityIndex', 'FertilitySpread']]
y = df_balanced['CycleWithPeakorNot']

# 4. Split and Scale
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# 5. Train Random Forest
# n_estimators=100 means we are using 100 decision trees together
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

# 6. Save the new "Smart" brain
joblib.dump(rf_model, 'cycle_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

print("Success: Random Forest model saved as cycle_model.pkl")

Success: Random Forest model saved as cycle_model.pkl


In [2]:
# In your Jupyter Notebook
import joblib

# Force the model to NOT use any new constraints that cause errors
rf_model.set_params(monotonic_cst=None) 

# Save again
joblib.dump(rf_model, 'cycle_model.pkl')
joblib.dump(scaler, 'scaler.pkl')


ValueError: Invalid parameter 'monotonic_cst' for estimator RandomForestClassifier(random_state=42). Valid parameters are: ['bootstrap', 'ccp_alpha', 'class_weight', 'criterion', 'max_depth', 'max_features', 'max_leaf_nodes', 'max_samples', 'min_impurity_decrease', 'min_samples_leaf', 'min_samples_split', 'min_weight_fraction_leaf', 'n_estimators', 'n_jobs', 'oob_score', 'random_state', 'verbose', 'warm_start'].

In [1]:
import sklearn
print(sklearn.__version__)

1.3.0
