In [10]:
import pandas as pd      
import numpy as np 
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Lasso, LassoCV
import pickle

In [2]:
df = pd.read_csv("new_file.csv")
df.head()

Unnamed: 0,hp_kW,km,age,price,make_model_Audi A1,make_model_Audi A3,make_model_Opel Astra,make_model_Opel Corsa,make_model_Opel Insignia,make_model_Renault Clio,make_model_Renault Duster,make_model_Renault Espace,Gearing_Type_Automatic,Gearing_Type_Manual,Gearing_Type_Semi-automatic
0,66.0,56013.0,3.0,15770,1,0,0,0,0,0,0,0,1,0,0
1,141.0,80000.0,2.0,14500,1,0,0,0,0,0,0,0,1,0,0
2,85.0,83450.0,3.0,14640,1,0,0,0,0,0,0,0,1,0,0
3,66.0,73000.0,3.0,14500,1,0,0,0,0,0,0,0,1,0,0
4,66.0,16200.0,3.0,16790,1,0,0,0,0,0,0,0,1,0,0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15419 entries, 0 to 15418
Data columns (total 15 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   hp_kW                        15419 non-null  float64
 1   km                           15419 non-null  float64
 2   age                          15419 non-null  float64
 3   price                        15419 non-null  int64  
 4   make_model_Audi A1           15419 non-null  int64  
 5   make_model_Audi A3           15419 non-null  int64  
 6   make_model_Opel Astra        15419 non-null  int64  
 7   make_model_Opel Corsa        15419 non-null  int64  
 8   make_model_Opel Insignia     15419 non-null  int64  
 9   make_model_Renault Clio      15419 non-null  int64  
 10  make_model_Renault Duster    15419 non-null  int64  
 11  make_model_Renault Espace    15419 non-null  int64  
 12  Gearing_Type_Automatic       15419 non-null  int64  
 13  Gearing_Type_Man

In [12]:
alpha_space = np.linspace(0.01, 100, 100)

In [13]:
X = df.drop(columns = ["price"])
y= df.price
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)
final_scaler = MinMaxScaler()
final_scaler.fit(X) 
X_scaled = final_scaler.transform(X)
lasso_model = Lasso(random_state=42)

param_grid = {'alpha':alpha_space}

final_model = GridSearchCV(estimator=lasso_model,
                          param_grid=param_grid,
                          scoring='neg_root_mean_squared_error',
                          cv=10,
                          n_jobs = -1)
final_model.fit(X_scaled,y)
filename = 'my_model'
pickle.dump(final_model, open(filename, 'wb'))

In [14]:
my_dict = {
    "hp_kW": 66,
    "age": 2,
    "km": 17000,
    "make_model": 'Audi A3',
    "Gearing_Type": "Automatic"
}

In [15]:
my_dict = pd.DataFrame([my_dict])
my_dict

Unnamed: 0,hp_kW,age,km,make_model,Gearing_Type
0,66,2,17000,Audi A3,Automatic


In [16]:
my_dict = pd.get_dummies(my_dict)
my_dict

Unnamed: 0,hp_kW,age,km,make_model_Audi A3,Gearing_Type_Automatic
0,66,2,17000,1,1


In [17]:
my_dict = my_dict.reindex(columns = X.columns, fill_value=0) # yeni sunacağımız veriyi modeldeki sütun düzenine göre ayarlıyoruz
my_dict

Unnamed: 0,hp_kW,km,age,make_model_Audi A1,make_model_Audi A3,make_model_Opel Astra,make_model_Opel Corsa,make_model_Opel Insignia,make_model_Renault Clio,make_model_Renault Duster,make_model_Renault Espace,Gearing_Type_Automatic,Gearing_Type_Manual,Gearing_Type_Semi-automatic
0,66,17000,2,0,1,0,0,0,0,0,0,1,0,0


In [18]:
my_dict = final_scaler.transform(my_dict)  # yeni veriyi scaling ediyoruz
my_dict

array([[0.13065327, 0.05362776, 0.66666667, 0.        , 1.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 1.        , 0.        , 0.        ]])

In [19]:
final_model.predict(my_dict) # bu verilere sahip araba 19559$ eder

array([19559.29001107])

In [20]:
df.Gearing_Type_Automatic.value_counts()

0    8521
1    6898
Name: Gearing_Type_Automatic, dtype: int64

In [26]:
df.hp_kW.values

array([ 66., 141.,  85., ..., 118., 146., 118.])

In [27]:
df.hp_kW.describe()

count    15419.000000
mean        86.600039
std         23.998609
min         40.000000
25%         66.000000
50%         85.000000
75%        100.000000
max        239.000000
Name: hp_kW, dtype: float64

In [28]:
df.age.describe()

count    15419.000000
mean         1.428368
std          1.115104
min          0.000000
25%          0.000000
50%          1.000000
75%          2.000000
max          3.000000
Name: age, dtype: float64

In [29]:
df.km.describe()

count     15419.000000
mean      32993.024205
std       37191.085153
min           0.000000
25%        3034.000000
50%       21379.000000
75%       48310.000000
max      317000.000000
Name: km, dtype: float64

In [30]:
my_dict = {
    "hp_kW": 120,
    "age": 1,
    "km": 170000,
    "make_model": 'Opel Insignia',
    "Gearing_Type": "Manual"
}

In [31]:
my_dict = pd.DataFrame([my_dict])
my_dict

Unnamed: 0,hp_kW,age,km,make_model,Gearing_Type
0,120,1,170000,Opel Insignia,Manual


In [32]:
my_dict = pd.get_dummies(my_dict)
my_dict

Unnamed: 0,hp_kW,age,km,make_model_Opel Insignia,Gearing_Type_Manual
0,120,1,170000,1,1


In [33]:
my_dict = my_dict.reindex(columns = X.columns, fill_value=0) # yeni sunacağımız veriyi modeldeki sütun düzenine göre ayarlıyoruz
my_dict

Unnamed: 0,hp_kW,km,age,make_model_Audi A1,make_model_Audi A3,make_model_Opel Astra,make_model_Opel Corsa,make_model_Opel Insignia,make_model_Renault Clio,make_model_Renault Duster,make_model_Renault Espace,Gearing_Type_Automatic,Gearing_Type_Manual,Gearing_Type_Semi-automatic
0,120,170000,1,0,0,0,0,1,0,0,0,0,1,0


In [34]:
my_dict = final_scaler.transform(my_dict)  # yeni veriyi scaling ediyoruz
my_dict

array([[0.40201005, 0.5362776 , 0.33333333, 0.        , 0.        ,
        0.        , 0.        , 1.        , 0.        , 0.        ,
        0.        , 0.        , 1.        , 0.        ]])

In [35]:
final_model.predict(my_dict) # bu verilere sahip araba 19559$ eder

array([16408.269438])

In [42]:
my_dict = {
    "hp_kW": 85,
    "age": 3,
    "km": 125000,
    "make_model": 'Renault Duster',
    "Gearing_Type": "Manual"
}

In [43]:
my_dict = pd.DataFrame([my_dict])
my_dict

Unnamed: 0,hp_kW,age,km,make_model,Gearing_Type
0,85,3,125000,Renault Duster,Manual


In [44]:
my_dict = pd.get_dummies(my_dict)
my_dict

Unnamed: 0,hp_kW,age,km,make_model_Renault Duster,Gearing_Type_Manual
0,85,3,125000,1,1


In [45]:
my_dict = my_dict.reindex(columns = X.columns, fill_value=0) # yeni sunacağımız veriyi modeldeki sütun düzenine göre ayarlıyoruz
my_dict

Unnamed: 0,hp_kW,km,age,make_model_Audi A1,make_model_Audi A3,make_model_Opel Astra,make_model_Opel Corsa,make_model_Opel Insignia,make_model_Renault Clio,make_model_Renault Duster,make_model_Renault Espace,Gearing_Type_Automatic,Gearing_Type_Manual,Gearing_Type_Semi-automatic
0,85,125000,3,0,0,0,0,0,0,1,0,0,1,0


In [46]:
my_dict = final_scaler.transform(my_dict)  # yeni veriyi scaling ediyoruz
my_dict

array([[0.22613065, 0.39432177, 1.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 1.        ,
        0.        , 0.        , 1.        , 0.        ]])

In [47]:
final_model.predict(my_dict) # bu verilere sahip araba 19559$ eder

array([3578.81688596])