In [2]:
import os

In [3]:
os.chdir('../../')

In [4]:
%pwd

'/Users/bhikipallai/Desktop/Projects/95Mobiles'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class Laptop_feConfig:
    root_dir: Path
    laptop_raw_data: Path
    laptop_cleaned_data: Path

In [6]:
from src.laptop.utils.common import read_yaml,create_directory
from src.laptop.constants import *

In [8]:
class ConfigurationManger:
    def __init__(self,config_filepath = CONFIG_FILE_PATH,param_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)

        create_directory([self.config.data_root])

    def get_Fe_config(self)->Laptop_feConfig:
        config = self.config.feature_eng
        create_directory([config.root_dir])

        model_feconfig = Laptop_feConfig(
            root_dir=config.root_dir,
            laptop_raw_data = config.laptop_raw_data,
            laptop_cleaned_data = config.laptop_cleaned_data
        )

        return model_feconfig

In [10]:
from sklearn.preprocessing import StandardScaler,LabelEncoder
import pandas as pd

In [21]:
class laptop_fe:
    def __init__(self,config:Laptop_feConfig):
        self.config = config

    def load_data(self):
        df = pd.read_csv(self.config.laptop_raw_data,encoding='ISO-8859-1')
        return df
    
    def drop_and_rename_cols_change_cols_value(self):
        df = self.load_data()
        df.drop(columns='laptop_ID',axis=1,inplace=True)
        df = df.rename(columns={"Price_euros":"price"})
        df['price'] = df['price'].multiply(92)
        df['Weight'] = df['Weight'].str.replace("kg",'').astype(float)
        return df
    
    def transform_data(self):
        df = self.drop_and_rename_cols_change_cols_value()
        le = LabelEncoder()
        stc = StandardScaler()
        cat_cols = []
        num_cols = []

        for col in df.columns:
            if df[col].dtype == 'float64':
                num_cols.append(col)
            elif df[col].dtype == 'object':
                cat_cols.append(col)
        
        num_cols.remove('price')

        for col in df[num_cols]:
            df[col] = stc.fit_transform(df[[col]])
        
        for col in df[cat_cols]:
            df[col] = le.fit_transform(df[col])
        
        print(df.head(2))
        return df
    
    def save_data(self):
        df = self.transform_data()
        df.to_csv(self.config.laptop_cleaned_data)
        print("data was saved")

In [22]:
try:
    config = ConfigurationManger()
    fe_config = config.get_Fe_config()
    laptop = laptop_fe(config=fe_config)
    laptop.load_data()
    laptop.drop_and_rename_cols_change_cols_value()
    laptop.transform_data()
    laptop.save_data()
except Exception as e:
    raise e

[2024-10-05 14:31:06,221: INFO]: yaml: config/laptop_config.yaml loaded successfully
[2024-10-05 14:31:06,223: INFO]: created directory at: data
[2024-10-05 14:31:06,224: INFO]: created directory at: data/laptop/feature_eng
   Company  Product  TypeName    Inches  ...  Gpu  OpSys    Weight      price
0        1      300         4 -1.204407  ...   58      8 -1.005283  123251.48
1        1      301         4 -1.204407  ...   51      8 -1.050381   82702.48

[2 rows x 12 columns]
   Company  Product  TypeName    Inches  ...  Gpu  OpSys    Weight      price
0        1      300         4 -1.204407  ...   58      8 -1.005283  123251.48
1        1      301         4 -1.204407  ...   51      8 -1.050381   82702.48

[2 rows x 12 columns]
data was saved
