In [22]:
import pandas as pd
import numpy as np
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
from functions import train_val
import pickle
import random

# Lets create synthetic Car Observations

In [78]:
df = pd.read_csv('final_scout_not_dummy.csv')
df = df.sample(10000)

In [79]:
random.seed=42
def generate_random_km():
    return random.randint(10000, 50000)
def generate_random_age():
    return random.randint(0, 4)

df['km'] = df['km'].apply(lambda x: generate_random_km())
df['age'] = df['age'].apply(lambda x: generate_random_age())

df.head(3)

Unnamed: 0,make_model,body_type,price,vat,km,Type,Fuel,Gears,Comfort_Convenience,Entertainment_Media,Extras,Safety_Security,age,Previous_Owners,hp_kW,Inspection_new,Paint_Type,Upholstery_type,Gearing_Type,Displacement_cc,Weight_kg,Drive_chain,cons_comb
11802,Opel Insignia,Sedans,23716,VAT deductible,37962,New,Diesel,6.0,"Air conditioning,Armrest,Cruise control,Electr...","Bluetooth,Hands-free equipment,On-board comput...",Alloy wheels,"ABS,Central door lock,Daytime running lights,D...",2,1.0,100.0,0,Metallic,Cloth,Automatic,1598.0,1441.0,front,5.0
5898,Opel Astra,Station wagon,13990,VAT deductible,45507,Used,Diesel,6.0,"Air conditioning,Cruise control,Electrical sid...","Bluetooth,Hands-free equipment,On-board comput...",Roof rack,"ABS,Central door lock with remote control,Dayt...",4,1.0,100.0,0,Metallic,Cloth,Automatic,1598.0,1425.0,front,4.5
9026,Opel Corsa,Sedans,7800,VAT deductible,16983,Used,Benzine,5.0,"Air conditioning,Cruise control,Electrical sid...","Bluetooth,CD player,Hands-free equipment,On-bo...","Alloy wheels,Touch screen,Voice Control","ABS,Central door lock with remote control,Driv...",2,1.0,66.0,0,Metallic,Cloth,Manual,1398.0,1163.0,front,5.1


# Applying data Transformations

In [80]:
categorical_features = df.select_dtypes(include='object').columns
numerical_features = df.select_dtypes(include='number').columns

def convert_specs_to_scores(df, cols, bonus_points_dict):
    
    for col in cols:
        df[col] = df[col].str.split(',').apply(lambda specs: sum(2 if spec.strip() in bonus_points_dict.get(col, []) else 1 for spec in specs))
    return df

cat_columns_convert = ['Comfort_Convenience', 'Entertainment_Media', 'Safety_Security', 'Extras']

bonus_points_dict = {
    'Comfort_Convenience': ['Auxiliary heating', 'Heads-up display', 'Heated steering wheel', 'Massage seats', 'Parking assist system camera', 'Seat ventilation'  ],
    'Entertainment_Media': ['On-board computer', 'Sound system', 'Television'],
    'Safety_Security'    : ['Adaptive Cruise Control', 'Adaptive headlights', 'Lane departure warning system', 'Night view assist', 'Traffic sign recognition', 'Xenon headlights' ],
    'Extras'             : []
}

df = convert_specs_to_scores(df, cat_columns_convert, bonus_points_dict)

X = df.drop(columns=['price'])
y = df.price

categorical_features = df.select_dtypes(include=['object', 'category']).columns
X_encoded = pd.get_dummies(X, columns=categorical_features, prefix=categorical_features, drop_first=True)
print(X_encoded.shape)
print(y.shape)

(10000, 43)
(10000,)


# Loading the model and scaler

In [81]:
scaler = pickle.load(open('scaler_scouting', 'rb'))
model_loaded = pickle.load(open('final_model_scouting', 'rb'))

# Making the Predictions

In [85]:
X_scaled = scaler.transform(X_encoded)

y_pred = model_loaded.predict(X_scaled)
y_pred_final = np.exp(y_pred) 
y_pred_final

array([18757.60044794, 12021.02171121,  9614.60639009, ...,
       21856.86434292, 22852.25400501, 16153.67107586])

In [91]:
from sklearn.metrics import r2_score, mean_squared_error

In [92]:
r2_score(y, y_pred_final)

0.540289159394022

In [93]:
mse = mean_squared_error(y, y_pred_final)
rmse = np.sqrt(mse)
rmse

4994.555038741074

__Tabi burada amac sadece modeli deploy edip sonra tahmin etmeyi basarabilmekti, urettigimiz sentetik veriler cok alakasiz oldugu ucun skorlar da cok dusuk cikti normal olarak__