In [1]:
import os

In [2]:
os.chdir("../../")

In [3]:
%pwd

'/Users/bhikipallai/Desktop/Projects/95Mobiles'

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class Mobile_Data_Cleaning_Config:
    root_dir: Path
    mobile_raw_data: Path
    mobile_cleaned_data: Path

In [5]:
from src.mobiles.constants import *
from src.mobiles.utils.common import read_yaml,create_directory

[2024-10-06 17:32:47,479: INFO]: Logging file start


In [6]:
class ConfigurationManger:
    def __init__(self,config_filepath = CONFIG_FILE_PATH,params_filepath = PARAMS_FILE_PATH) :
        self.config = read_yaml(config_filepath)
        create_directory([self.config.data_root])

    def get_mobile_data_cleaning(self)->Mobile_Data_Cleaning_Config:
        config = self.config.data_cleaning
        create_directory([config.root_dir])

        model_datacleaning = Mobile_Data_Cleaning_Config(
            root_dir=config.root_dir,
            mobile_raw_data=config.mobile_raw_data,
            mobile_cleaned_data=config.mobile_cleaned_data
        )

        return model_datacleaning

In [7]:
import pandas as pd

In [8]:
class DataCleaning:
    def __init__(self,config:Mobile_Data_Cleaning_Config):
        self.config = config
    
    def load_data(self):
        self.df = pd.read_csv(self.config.mobile_raw_data)
        return self.df
    
    def columns_cleaning(self):
        df = self.df
        df = df.rename(columns={'Price ($)':'price'})
        df = df.rename(columns={'Screen Size (inches)':'screen_size','Camera (MP)':'camera','Battery Capacity (mAh)':'battery'})
        df.columns=df.columns.str.replace(' ','')
        return df
    
    def clean_data(self):
        df = self.columns_cleaning()
        df['price'] = df['price'].str.replace(r'\D', '', regex=True)
        df['price']=df['price'].astype('float')
        df['price']=df['price'].multiply(84)
        df['camera'] = df['camera'].str.replace('MP','')
        df['RAM'] = df['RAM'].str.replace(r'\D', '', regex=True)
        df['Storage'] = df['Storage'].str.replace(r'\D', '', regex=True)
        df['camera'] = df['camera'].str.replace(' ','')
        print(df.head(10))

        df.to_csv(self.config.mobile_cleaned_data)

In [9]:
try:
    config = ConfigurationManger()
    data_cleaning = DataCleaning(config.get_mobile_data_cleaning())
    data_cleaning.load_data()
    data_cleaning.columns_cleaning()
    data_cleaning.clean_data()
except Exception as e:
    raise e

[2024-10-06 17:32:47,801: INFO]: yaml: config/mobiles_config.yaml loaded successfully
[2024-10-06 17:32:47,802: INFO]: created directory at: data
[2024-10-06 17:32:47,803: INFO]: created directory at: data/mobile/data_cleaning
     Brand              Model Storage RAM screen_size        camera  battery  \
0    Apple      iPhone 13 Pro     128   6         6.1      12+12+12     3095   
1  Samsung   Galaxy S21 Ultra     256  12         6.8  108+10+10+12     5000   
2  OnePlus              9 Pro     128   8         6.7     48+50+8+2     4500   
3   Xiaomi  Redmi Note 10 Pro     128   6        6.67      64+8+5+2     5020   
4   Google            Pixel 6     128   8         6.4       50+12.2     4614   
5    Apple          iPhone 13     128   4         6.1         12+12     2815   
6  Samsung     Galaxy Z Flip3     256   8         6.7         12+12     3300   
7   Xiaomi        Poco X3 Pro     128   6        6.67      48+8+2+2     5160   
8     Oppo      Reno6 Pro+ 5G     128   8        6.55