In [18]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.preprocessing import LabelEncoder, StandardScaler
from joblib import dump


data = pd.read_csv('veri.csv')


label_encoder = LabelEncoder()
data['Cinsiyet'] = label_encoder.fit_transform(data['Cinsiyet'])
data['Aktivite Seviyesi'] = label_encoder.fit_transform(data['Aktivite Seviyesi'])


X = data[['Boy (cm)', 'Kilo (kg)', 'Yaş (yıl)', 'Cinsiyet', 'Aktivite Seviyesi']]
Y = data[['Günlük Kalori İhtiyacı (kcal)']]

Y = Y.values.ravel()


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=42)


algoritmalar = {
    'Linear Regression': LinearRegression(),
    'Decision Tree': DecisionTreeRegressor(),
    'Random Forest': RandomForestRegressor(),
    'Support Vector Regressor': SVR(),
    'K-Nearest Neighbors': KNeighborsRegressor(),
    'Neural Network': GradientBoostingRegressor(
        n_estimators=500,  
        learning_rate=0.05,  
        max_depth=5,  
        random_state=42
    ),
    'Gradient Boosting': GradientBoostingRegressor(),
    'AdaBoost': AdaBoostRegressor(),
    'Ridge Regression': Ridge(),
    'Lasso Regression': Lasso()
}


rmse_results = []


for name, model in algoritmalar.items():
    model.fit(X_train, Y_train)
    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(Y_test, y_pred))
    rmse_results.append({'Model': name, 'RMSE': rmse})


results_df = pd.DataFrame(rmse_results)


best_model_row = results_df.loc[results_df['RMSE'].idxmin()]
best_model_name = best_model_row['Model']
best_model_rmse = best_model_row['RMSE']


best_model = algoritmalar[best_model_name]
best_model.fit(X_scaled, Y)


dump(best_model, 'eniyi.joblib')


print("Tüm modellerin RMSE değerleri:")
print(results_df)
print(f"\nEn iyi model: {best_model_name} (RMSE: {best_model_rmse})")


results_df.to_csv('model_performances.csv', index=False)


Tüm modellerin RMSE değerleri:
                      Model        RMSE
0         Linear Regression  467.232204
1             Decision Tree  363.371463
2             Random Forest  220.299625
3  Support Vector Regressor  694.449316
4       K-Nearest Neighbors  354.300075
5            Neural Network  184.488683
6         Gradient Boosting  125.025136
7                  AdaBoost  190.694669
8          Ridge Regression  464.941334
9          Lasso Regression  466.112853

En iyi model: Gradient Boosting (RMSE: 125.02513568404999)


In [13]:
pip install numpy pandas scikit-learn joblib


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.
