In [None]:
# 📌 Gerekli Kütüphaneleri Yükleme
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
from sklearn.metrics import mean_absolute_error, mean_squared_error
import pickle
import os

# 📌 1️⃣ Excel Veri Setini Yükleme veya Oluşturma
file_path = "Guncellenmis_Veriset.xlsx"

if os.path.exists(file_path):
    df = pd.read_excel(file_path)
else:
    df = pd.read_excel("C:\\Users\\Furkan\\OneDrive\\Desktop\\Kaggle veriler\\Fit_Data.xlsx")

# 📌 2️⃣ Gereksiz Sütunları Kaldırma
if "id" in df.columns:
    df.drop(columns=["id"], inplace=True)

# 📌 3️⃣ Kategorik Verileri Sayısala Çevirme
categoric_columns = ["Gender", "Physical Activity Level", "Daily Activity Level", "Prior Exercise Experience", "Sleep Quality", "Goal"]

# Cinsiyet için LabelEncoder
gender_encoder = LabelEncoder()
df["Gender"] = gender_encoder.fit_transform(df["Gender"].astype(str))

# Manuel eşlemeler
mappings = {
    "Physical Activity Level": {"Sedentary": 0, "Lightly Active": 1, "Moderately Active": 2, "Very Active": 3, "Super Active": 4},
    "Daily Activity Level": {"İyi": 2, "Orta": 1, "Kötü": 0, "Poor": 0, "Moderate": 1, "Good": 2},
    "Prior Exercise Experience": {"Bir yıldan az": 0, "Bir yıl": 1, "Bir yıldan fazla": 2, "Less than a year": 0, "One year": 1, "More than a year": 2},
    "Sleep Quality": {"Excellent": 3, "Good": 2, "Fair": 1, "Poor": 0},
    "Goal": {"Kilo vermek": 0, "Kilo almak": 1, "Kas yapmak": 2, "Lose weight": 0, "Gain weight": 1, "Build muscle": 2}
}
for column, mapping in mappings.items():
    df[column] = df[column].map(mapping)

# 📌 4️⃣ Eksik Verileri Doldurma
df.fillna(df.mode().iloc[0], inplace=True)

# 📌 5️⃣ Bağımlı ve Bağımsız Değişkenler
target_columns = ["Daily Water Intake (L)", "Daily Exercise (min)", "Daily Calories Consumed"]
X = df.drop(columns=target_columns, errors="ignore")
y = df[target_columns]

# 📌 6️⃣ Eğitim/Test Verisi
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 📌 7️⃣ Ölçeklendirme
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

# 📌 8️⃣ Modelleri Eğitme
linear_model = LinearRegression()
linear_model.fit(X_train_scaled, y_train)
linear_preds = linear_model.predict(X_test_scaled)

rf_model = RandomForestRegressor(n_estimators=50, random_state=42)
rf_model.fit(X_train_scaled, y_train)
rf_preds = rf_model.predict(X_test_scaled)

xgb_model = xgb.XGBRegressor(objective="reg:squarederror", n_estimators=50, random_state=42)
xgb_model.fit(X_train_scaled, y_train)
xgb_preds = xgb_model.predict(X_test_scaled)

# 📌 9️⃣ Performans
def evaluate_model(name, y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    return {"Model": name, "MAE": mae, "RMSE": rmse}

results = [
    evaluate_model("Linear Regression", y_test, linear_preds),
    evaluate_model("Random Forest", y_test, rf_preds),
    evaluate_model("XGBoost", y_test, xgb_preds)
]

results_df = pd.DataFrame(results)

# 📌 🔟 En İyi Modeli Kaydet
best_model = min(results, key=lambda x: x["RMSE"])
if best_model["Model"] == "Linear Regression":
    final_model = linear_model
elif best_model["Model"] == "Random Forest":
    final_model = rf_model
else:
    final_model = xgb_model

with open("best_model.pkl", "wb") as f:
    pickle.dump(final_model, f)

# 📌 1️⃣1️⃣ Güncellenmiş Veriyi Kaydet
df.to_excel("Guncellenmis_Veriset.xlsx", index=False)

# 📌 1️⃣2️⃣ API veya manuel kullanım için tahmin fonksiyonu
def predict_new_data(new_user_data):
    new_user_df = pd.DataFrame([new_user_data])

    # 🔢 Otomatik kilo hesaplamaları
    calories_per_pound = 3500
    new_user_df["Final Weight (lbs)"] = new_user_df["Current Weight (lbs)"] + (
        new_user_df["Daily Caloric Surplus/Deficit"] * new_user_df["Duration (weeks)"] * 7 / calories_per_pound
    )
    new_user_df["Weight Change (lbs)"] = new_user_df["Final Weight (lbs)"] - new_user_df["Current Weight (lbs)"]

    # 🔁 Encode işlemleri
    new_user_df["Gender"] = gender_encoder.transform([new_user_df["Gender"][0]])
    for column, mapping in mappings.items():
        new_user_df[column] = new_user_df[column].map(mapping)

    # Eksik veri doldur
    new_user_df.fillna(df.mode().iloc[0], inplace=True)

    # Ölçekleme
    new_user_scaled = scaler.transform(new_user_df)

    # Tahmin
    prediction = final_model.predict(new_user_scaled)

    return {
        "Daily Water Intake (L)": prediction[0][0],
        "Daily Exercise (min)": prediction[0][1],
        "Daily Calories Consumed": prediction[0][2]
    }

# ✅ Örnek Tahmin (Test)
new_user = {
    "Age": 25,
    "Gender": "Male",
    "Current Weight (lbs)": 180.0,
    "BMR (Calories)": 1750,
    "Daily Caloric Surplus/Deficit": -500,
    "Duration (weeks)": 8,
    "Physical Activity Level": "Very Active",
    "Sleep Quality": "Good",
    "Stress Level": 3,
    "Goal": "Kilo vermek",
    "Prior Exercise Experience": "One year",
    "Daily Activity Level": "Moderate",
    "Weight (kg)": 81,
    "BMI": 24.5,
    "Height (m)": 1.80
}


print("\n📢 Tahmin Sonuçları:")
print(predict_new_data(new_user))



  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


ValueError: Input X contains NaN.
LinearRegression does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

ValueError: Input X contains NaN.
LinearRegression does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values