In [1]:
%pwd

'x:\\CampusX\\projects\\end-to-end_laptop_project\\research'

In [2]:
import os
os.chdir("../")

In [3]:
%pwd

'x:\\CampusX\\projects\\end-to-end_laptop_project'

In [35]:
from dataclasses import dataclass
from pathlib import Path

In [38]:
@dataclass(frozen=True)
class ModelSelectionConfig:
    root_dir: Path
    after_feature_selection: Path
    model_path: Path
    svr__C: int
    svr__gamma: str
    svr__kernel: str

In [37]:
from laptop_ml.constants import *
from laptop_ml.utils.common import read_yaml, create_directories

In [39]:
class ConfigurationManager:
    def __init__(self,
                 config_file_path=CONFIG_FILE_PATH,
                 params_file_path=PARAMS_FILE_PATH):
        self.config=read_yaml(config_file_path)
        self.params=read_yaml(params_file_path)
        create_directories([self.config.artifacts])

    def get_model_selection_config(self)->ModelSelectionConfig:
        config=self.config.model_selection
        param=self.params.model_param
        create_directories([config.root_dir])

        model_selection_config=ModelSelectionConfig(
            root_dir=config.root_dir,
            after_feature_selection=config.after_feature_selection,
            model_path=config.model_path,
            svr__C=param.svr__C,
            svr__gamma=param.svr__gamma,
            svr__kernel=param.svr__kernel
        )
        return model_selection_config

In [41]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold, cross_val_score
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler, OrdinalEncoder
from sklearn.compose import ColumnTransformer
import joblib

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

In [47]:
class ModelSelection:
    def __init__(self,config: ModelSelectionConfig) :
        self.config=config
        self.df=pd.read_csv(self.config.after_feature_selection,index_col=0)

    def prepare_dataset(self,df):
        X = df.drop(columns=['price'])
        y = df['price']
        y_transformed = np.log1p(y)
        return X , y , y_transformed
    
    def export_model(self,pipeline):
        joblib.dump(pipeline,self.config.model_path)
        
    
    def encode_columns(self):
        columns_to_encode=["weight","display_size","battery_capacity","ppi"]
        preprocessor = ColumnTransformer(
            transformers=[
                ('num', StandardScaler(), ["ram","ssd","core","warranty"]),
                ('cat', OrdinalEncoder(), columns_to_encode),
                ('cat1',OneHotEncoder(drop='first'),["brand","utility","graphic","processor"])
            ],
            remainder='passthrough'
        )
        return preprocessor


    def train_model(self):
        X,y,y_transformed=self.prepare_dataset(self.df)
        preprocessor=self.encode_columns()
        pipeline=Pipeline([
            ("preprocessor",preprocessor),
            ("svr",SVR(C=self.config.svr__C,gamma=self.config.svr__gamma, kernel=self.config.svr__kernel))
        ])

        pipeline.fit(X,y_transformed)
        self.export_model(pipeline)


    

In [48]:
try:
    config=ConfigurationManager()
    model_selection_config=config.get_model_selection_config()
    featureselection=ModelSelection(config=model_selection_config)
    featureselection.train_model()
except Exception as e:
    raise e

[2024-08-23 17:49:56,805: INFO :common :yaml file: config\config.yaml loaded successfully]
[2024-08-23 17:49:56,805: INFO :common :yaml file: params.yaml loaded successfully]
[2024-08-23 17:49:56,814: INFO :common :created directory at: root]
[2024-08-23 17:49:56,814: INFO :common :created directory at: artifacts/model_selection]


In [50]:
loaded_model=joblib.load("artifacts/model_selection/best_svr_model.pkl")

In [58]:
data=[["lenovo","Business","lite",2,"small","medium",0,16,1024,"NVIDIA",8,1,1,"medium","Intel i5 11.0"]]

In [59]:
columns=['brand', 'utility', 'weight', 'warranty', 'display_size', 'ppi',
       'touch_screen', 'ram', 'ssd', 'graphic', 'core', 'hdmi',
       'backlit_keyboard', 'battery_capacity', 'processor']

In [60]:
one_df = pd.DataFrame(data, columns=columns)

one_df

Unnamed: 0,brand,utility,weight,warranty,display_size,ppi,touch_screen,ram,ssd,graphic,core,hdmi,backlit_keyboard,battery_capacity,processor
0,lenovo,Business,lite,2,small,medium,0,16,1024,NVIDIA,8,1,1,medium,Intel i5 11.0


In [61]:
np.expm1(loaded_model.predict(one_df))

array([105214.83518766])