In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Regresyon modelleri
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor


In [2]:
# Veriyi oku
df = pd.read_csv('veri.csv')

# One-hot encoding yap
df_encoded = pd.get_dummies(df, columns=['Marka', 'Model', 'Yakit_Tipi', 'Vites_Tipi'])

# Özellikleri ve hedefi ayır
X = df_encoded.drop('Fiyat', axis=1)
y = df_encoded['Fiyat']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaler'ı eğit
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Feature names'i kaydet
feature_names = X.columns.tolist()
joblib.dump(feature_names, 'feature_names.joblib')
joblib.dump(scaler, 'scaler.joblib')

# Modelleri tanımla ve eğit
models = {
    'Linear Regression': LinearRegression(),
    'Ridge': Ridge(),
    'Lasso': Lasso(),
    'Decision Tree': DecisionTreeRegressor(),
    'Random Forest': RandomForestRegressor(),
    'Gradient Boosting': GradientBoostingRegressor(),
    'SVR': SVR(),
    'KNN': KNeighborsRegressor(),
    'MLP': MLPRegressor(max_iter=1000),
    'XGBoost': XGBRegressor()
}


In [3]:
# Modelleri tanımla
models = {
    'Linear Regression': LinearRegression(),
    'Ridge': Ridge(),
    'Lasso': Lasso(),
    'Decision Tree': DecisionTreeRegressor(),
    'Random Forest': RandomForestRegressor(),
    'Gradient Boosting': GradientBoostingRegressor(),
    'SVR': SVR(),
    'KNN': KNeighborsRegressor(),
    'MLP': MLPRegressor(max_iter=1000),
    'XGBoost': XGBRegressor()
}

# Sonuçları saklamak için sözlük
results = {}

# Her modeli eğit ve değerlendir
for name, model in models.items():
    print(f"Training {name}...")
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    results[name] = {'RMSE': rmse, 'R2': r2}
    print(f"{name} - RMSE: {rmse:.2f}, R2: {r2:.4f}\n")

# En iyi modeli bul
best_model_name = min(results.items(), key=lambda x: x[1]['RMSE'])[0]
print(f"\nEn iyi model: {best_model_name}")
print(f"RMSE: {results[best_model_name]['RMSE']:.2f}")
print(f"R2: {results[best_model_name]['R2']:.4f}")


Training Linear Regression...
Linear Regression - RMSE: 13187.17, R2: 0.9878

Training Ridge...
Ridge - RMSE: 13133.18, R2: 0.9879

Training Lasso...
Lasso - RMSE: 13141.01, R2: 0.9879

Training Decision Tree...
Decision Tree - RMSE: 34165.70, R2: 0.9181

Training Random Forest...


  model = cd_fast.enet_coordinate_descent(


Random Forest - RMSE: 19063.10, R2: 0.9745

Training Gradient Boosting...
Gradient Boosting - RMSE: 14661.62, R2: 0.9849

Training SVR...
SVR - RMSE: 119376.87, R2: 0.0001

Training KNN...
KNN - RMSE: 59162.75, R2: 0.7544

Training MLP...


[WinError 2] Sistem belirtilen dosyayı bulamıyor
  File "C:\Users\deniz\anaconda3\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "C:\Users\deniz\anaconda3\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\deniz\anaconda3\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "C:\Users\deniz\anaconda3\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


MLP - RMSE: 546623.09, R2: -19.9647

Training XGBoost...
XGBoost - RMSE: 14469.40, R2: 0.9853


En iyi model: Ridge
RMSE: 13133.18
R2: 0.9879




In [4]:
# En iyi modeli kaydet
best_model = models[best_model_name]
joblib.dump(best_model, 'eniyi.joblib')
# Scaler'ı da kaydet (Streamlit uygulaması için gerekli olacak)
joblib.dump(scaler, 'scaler.joblib')
# Feature isimleri de kaydedilmeli
feature_names = X.columns.tolist()
joblib.dump(feature_names, 'feature_names.joblib')


['feature_names.joblib']