In [20]:
import pandas as pd
import numpy as np
import joblib

from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [2]:
data = fetch_california_housing()

In [3]:
data = fetch_california_housing()
X = data.data
y = data.target

df = pd.DataFrame(X, columns=data.feature_names)
df["target"] = y
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,target
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [4]:
df.drop(columns=['Latitude'], inplace=True)
df.drop(columns=['Longitude'], inplace=True)

In [5]:
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,target
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,3.422


In [6]:
df.rename(columns={"MedInc":"ort_gelir", 
                   "HouseAge":"ev_yas",
                   "AveRooms":"oda_sayisi",
                   "AveBedrms":"yatak_odasi",
                   "Population":"nüfus",
                   "AveOccup":"evde_yasayan",
                  "target":"fiyat"}, inplace=True)

In [7]:
df.head()

Unnamed: 0,ort_gelir,ev_yas,oda_sayisi,yatak_odasi,nüfus,evde_yasayan,fiyat
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,3.422


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   ort_gelir     20640 non-null  float64
 1   ev_yas        20640 non-null  float64
 2   oda_sayisi    20640 non-null  float64
 3   yatak_odasi   20640 non-null  float64
 4   nüfus         20640 non-null  float64
 5   evde_yasayan  20640 non-null  float64
 6   fiyat         20640 non-null  float64
dtypes: float64(7)
memory usage: 1.1 MB


In [9]:
# df["ort_gelir"] = df["ort_gelir"].astype(str).str.replace('.', '').astype(int)
df["fiyat"] = df["fiyat"].astype(str).str.replace('.', '').astype(int)

columns = ["ev_yas","oda_sayisi","yatak_odasi","nüfus","evde_yasayan"]

for column in columns:
    df[column] = df[column].astype(int)
df.head()

Unnamed: 0,ort_gelir,ev_yas,oda_sayisi,yatak_odasi,nüfus,evde_yasayan,fiyat
0,8.3252,41,6,1,322,2,4526
1,8.3014,21,6,0,2401,2,3585
2,7.2574,52,8,1,496,2,3521
3,5.6431,52,5,1,558,2,3413
4,3.8462,52,6,1,565,2,3422


In [10]:
df.head()

Unnamed: 0,ort_gelir,ev_yas,oda_sayisi,yatak_odasi,nüfus,evde_yasayan,fiyat
0,8.3252,41,6,1,322,2,4526
1,8.3014,21,6,0,2401,2,3585
2,7.2574,52,8,1,496,2,3521
3,5.6431,52,5,1,558,2,3413
4,3.8462,52,6,1,565,2,3422


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   ort_gelir     20640 non-null  float64
 1   ev_yas        20640 non-null  int32  
 2   oda_sayisi    20640 non-null  int32  
 3   yatak_odasi   20640 non-null  int32  
 4   nüfus         20640 non-null  int32  
 5   evde_yasayan  20640 non-null  int32  
 6   fiyat         20640 non-null  int32  
dtypes: float64(1), int32(6)
memory usage: 645.1 KB


In [12]:
df.drop(columns=["nüfus", "evde_yasayan"], inplace=True)

In [13]:
df.head()

Unnamed: 0,ort_gelir,ev_yas,oda_sayisi,yatak_odasi,fiyat
0,8.3252,41,6,1,4526
1,8.3014,21,6,0,3585
2,7.2574,52,8,1,3521
3,5.6431,52,5,1,3413
4,3.8462,52,6,1,3422


In [14]:
X = df.drop(columns=["fiyat"])
y = df["fiyat"]

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                   test_size=0.33, random_state=42)

In [16]:
"""Regresyon Modelleri
LinearRegression
Ridge
PolynomialFeatures + LinearRegression
DecisionTreeRegressor
RandomForestRegressor
SVR
"""



def model_fit_regression(X_train_data, y_train_data):
    """Regresyon Modellerin Eğitimi"""

    models = {
        "LinearRegression" : LinearRegression(),
        "Ridge" : Ridge(),
        "DecisionTreeRegressor" : DecisionTreeRegressor(),
        "RandomForestRegressor" : RandomForestRegressor(),
        "SVR" : SVR(),
        "Polynomial Regression (deg=2)": Pipeline([
            ("poly", PolynomialFeatures(degree=2)),
            ("lin_reg", LinearRegression())
        ])
    }

    trained_models = {}

    for name, model in models.items():
        model.fit(X_train_data, y_train_data)
        trained_models[name] = model

    return trained_models

In [17]:
trained_models = model_fit_regression(X_train, y_train)

In [18]:
y_pred = trained_models["Ridge"].predict(X_test)

In [22]:
def evaluate_models(trained_models, X_test, y_test):
    """
    Tüm regresyon modellerini test verisi üzerinde değerlendirir.
    """
    results = {}
    
    for name, model in trained_models.items():
        y_pred = model.predict(X_test)
        
        mse = mean_squared_error(y_test, y_pred)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        
        results[name] = {
            "MSE": mse,
            "RMSE": rmse,
            "MAE": mae,
            "R²": r2
        }
    
    return results

# Kullanım
trained_models = model_fit_regression(X_train, y_train)
results = evaluate_models(trained_models, X_test, y_test)

# Sonuçları yazdır
for model, metrics in results.items():
    print(f"\n{model}")
    for metric, value in metrics.items():
        print(f"{metric}: {value:.4f}")



LinearRegression
MSE: 8432244637.6338
RMSE: 91827.2543
MAE: 49664.7052
R²: 0.2371

Ridge
MSE: 8432199696.3046
RMSE: 91827.0096
MAE: 49663.4919
R²: 0.2371

DecisionTreeRegressor
MSE: 13227764734.8892
RMSE: 115012.0200
MAE: 27469.9509
R²: -0.1968

RandomForestRegressor
MSE: 7930251145.1441
RMSE: 89051.9576
MAE: 26975.2245
R²: 0.2825

SVR
MSE: 11590664293.1765
RMSE: 107659.9475
MAE: 24058.5931
R²: -0.0487

Polynomial Regression (deg=2)
MSE: 7433609301.8711
RMSE: 86218.3815
MAE: 34843.0047
R²: 0.3274


In [24]:
print(trained_models)

{'LinearRegression': LinearRegression(), 'Ridge': Ridge(), 'DecisionTreeRegressor': DecisionTreeRegressor(), 'RandomForestRegressor': RandomForestRegressor(), 'SVR': SVR(), 'Polynomial Regression (deg=2)': Pipeline(steps=[('poly', PolynomialFeatures()),
                ('lin_reg', LinearRegression())])}


In [26]:
joblib.dump({"model": trained_models["LinearRegression"], "columns": X.columns.tolist()}, "LinearRegression.joblib")
joblib.dump({"model": trained_models["Ridge"], "columns": X.columns.tolist()}, "Ridge.joblib")
joblib.dump({"model": trained_models["DecisionTreeRegressor"], "columns": X.columns.tolist()}, "DecisionTreeRegressor.joblib")
joblib.dump({"model": trained_models["RandomForestRegressor"], "columns": X.columns.tolist()}, "RandomForestRegressor.joblib")
joblib.dump({"model": trained_models["SVR"], "columns": X.columns.tolist()}, "SVR.joblib")
joblib.dump({"model": trained_models["Polynomial Regression (deg=2)"], "columns": X.columns.tolist()}, "PolynomialRegression.joblib")


['PolynomialRegression.joblib']

In [18]:
"""Sınıflandırma modelleri
LogisticRegression
DecisionTreeClassifier 
RandomForestClassifier
KNeighborsClassifier
ExtraTreesClassifier
"""

import pandas as pd
import numpy as np
import joblib

from sklearn.datasets import load_wine
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [2]:
data = load_wine()
X = data.data
y = data.target

df = pd.DataFrame(X, columns=data.feature_names)
df["target"] = y
df.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0


In [5]:
df["target"].unique()

array([0, 1, 2])

In [6]:
df.rename(columns={"alcohol":"alkol",
                   "malic_acid":"malik_asid_miktari",
                   "ash":"kül_miktarı", # kuru artık maddeler
                   "alcalinity_of_ash":"kül_alkalinitesi",
                   "magnesium":"magnezyum_miktari",
                   "total_phenols":"toplam_fenol",
                   "flavanoids":"flavonoid_fenol",
                   "nonflavanoid_phenols":"flavonoid_olmayan_fenoller",
                   "proanthocyanins":"proantosiyanin_miktari",
                   "color_intensity":"renk_yogunlugu",# renk yogunlugu koyu mu acik mi?
                   "hue":"renk_tonu",
                   "proline":"prolin_miktari"}, inplace=True)

In [10]:
df.head()

Unnamed: 0,alkol,malik_asid_miktari,kül_miktarı,kül_alkalinitesi,magnezyum_miktari,toplam_fenol,flavonoid_fenol,flavonoid_olmayan_fenoller,proantosiyanin_miktari,renk_yogunlugu,renk_tonu,prolin_miktari,target
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,1065.0,0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,1185.0,0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,735.0,0


In [9]:
df.drop(columns=["od280/od315_of_diluted_wines"], inplace=True)

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178 entries, 0 to 177
Data columns (total 13 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   alkol                       178 non-null    float64
 1   malik_asid_miktari          178 non-null    float64
 2   kül_miktarı                 178 non-null    float64
 3   kül_alkalinitesi            178 non-null    float64
 4   magnezyum_miktari           178 non-null    float64
 5   toplam_fenol                178 non-null    float64
 6   flavonoid_fenol             178 non-null    float64
 7   flavonoid_olmayan_fenoller  178 non-null    float64
 8   proantosiyanin_miktari      178 non-null    float64
 9   renk_yogunlugu              178 non-null    float64
 10  renk_tonu                   178 non-null    float64
 11  prolin_miktari              178 non-null    float64
 12  target                      178 non-null    int32  
dtypes: float64(12), int32(1)
memory usa

In [12]:
columns = ["magnezyum_miktari", "prolin_miktari"]

for i in columns:
    df[i] = df[i].astype(int)

df.head()

Unnamed: 0,alkol,malik_asid_miktari,kül_miktarı,kül_alkalinitesi,magnezyum_miktari,toplam_fenol,flavonoid_fenol,flavonoid_olmayan_fenoller,proantosiyanin_miktari,renk_yogunlugu,renk_tonu,prolin_miktari,target
0,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,1065,0
1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,1050,0
2,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,1185,0
3,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,1480,0
4,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,735,0


In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178 entries, 0 to 177
Data columns (total 13 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   alkol                       178 non-null    float64
 1   malik_asid_miktari          178 non-null    float64
 2   kül_miktarı                 178 non-null    float64
 3   kül_alkalinitesi            178 non-null    float64
 4   magnezyum_miktari           178 non-null    int32  
 5   toplam_fenol                178 non-null    float64
 6   flavonoid_fenol             178 non-null    float64
 7   flavonoid_olmayan_fenoller  178 non-null    float64
 8   proantosiyanin_miktari      178 non-null    float64
 9   renk_yogunlugu              178 non-null    float64
 10  renk_tonu                   178 non-null    float64
 11  prolin_miktari              178 non-null    int32  
 12  target                      178 non-null    int32  
dtypes: float64(10), int32(3)
memory usa

In [14]:
X = df.drop(columns=["target"])
y = df["target"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [19]:
"""Sınıflandırma modelleri
LogisticRegression
DecisionTreeClassifier 
RandomForestClassifier
KNeighborsClassifier
ExtraTreesClassifier
"""

def model_fit_classification(X_train_data, y_train_data):
    """Sınıflandırma Modellerini Eğit (Scaler + max_iter ile)"""
    
    models = {
        "LogisticRegression": Pipeline([
            ("scaler", StandardScaler()),
            ("logreg", LogisticRegression(max_iter=1000))
        ]),
        "DecisionTreeClassifier": DecisionTreeClassifier(),
        "RandomForestClassifier": RandomForestClassifier(),
        "KNeighborsClassifier": Pipeline([
            ("scaler", StandardScaler()),
            ("knn", KNeighborsClassifier())
        ]),
        "ExtraTreesClassifier": ExtraTreesClassifier(),
    }

    trained_models = {}
    for name, model in models.items():
        model.fit(X_train_data, y_train_data)
        trained_models[name] = model

    return trained_models


In [20]:
trained_models = model_fit_classification(X_train, y_train)

In [21]:
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

def evaluate_models(trained_models, X_test, y_test):
    """
    Tüm sınıflandırma modellerini test verisi üzerinde değerlendirir.
    """
    results = {}
    
    for name, model in trained_models.items():
        y_pred = model.predict(X_test)
        
        results[name] = {
            "ACC": accuracy_score(y_test, y_pred),
            "F1": f1_score(y_test, y_pred, average="weighted"),
            "CONFMTRX": confusion_matrix(y_test, y_pred)
        }
    
    return results

# Kullanım
trained_models = model_fit_classification(X_train, y_train)
results = evaluate_models(trained_models, X_test, y_test)

# Sonuçları yazdır
for model, metrics in results.items():
    print(f"\n{model}")
    print(f"Accuracy: {metrics['ACC']:.4f}")
    print(f"F1 Score: {metrics['F1']:.4f}")
    print(f"Confusion Matrix:\n{metrics['CONFMTRX']}")


LogisticRegression
Accuracy: 1.0000
F1 Score: 1.0000
Confusion Matrix:
[[14  0  0]
 [ 0 14  0]
 [ 0  0  8]]

DecisionTreeClassifier
Accuracy: 0.9444
F1 Score: 0.9440
Confusion Matrix:
[[13  1  0]
 [ 0 14  0]
 [ 1  0  7]]

RandomForestClassifier
Accuracy: 1.0000
F1 Score: 1.0000
Confusion Matrix:
[[14  0  0]
 [ 0 14  0]
 [ 0  0  8]]

KNeighborsClassifier
Accuracy: 0.9444
F1 Score: 0.9436
Confusion Matrix:
[[14  0  0]
 [ 1 12  1]
 [ 0  0  8]]

ExtraTreesClassifier
Accuracy: 1.0000
F1 Score: 1.0000
Confusion Matrix:
[[14  0  0]
 [ 0 14  0]
 [ 0  0  8]]


In [22]:
trained_models

{'LogisticRegression': Pipeline(steps=[('scaler', StandardScaler()),
                 ('logreg', LogisticRegression(max_iter=1000))]),
 'DecisionTreeClassifier': DecisionTreeClassifier(),
 'RandomForestClassifier': RandomForestClassifier(),
 'KNeighborsClassifier': Pipeline(steps=[('scaler', StandardScaler()), ('knn', KNeighborsClassifier())]),
 'ExtraTreesClassifier': ExtraTreesClassifier()}

In [23]:
joblib.dump({"model": trained_models["LogisticRegression"], "columns": X.columns.tolist()}, "LogisticRegression.joblib")
joblib.dump({"model": trained_models["DecisionTreeClassifier"], "columns": X.columns.tolist()}, "DecisionTreeClassifier.joblib")
joblib.dump({"model": trained_models["RandomForestClassifier"], "columns": X.columns.tolist()}, "RandomForestClassifier.joblib")
joblib.dump({"model": trained_models["KNeighborsClassifier"], "columns": X.columns.tolist()}, "KNeighborsClassifier.joblib")
joblib.dump({"model": trained_models["ExtraTreesClassifier"], "columns": X.columns.tolist()}, "ExtraTreesClassifier.joblib")

['ExtraTreesClassifier.joblib']