# <center> Feature Engineering (2) Data Cleaning (2)

### Problem Statement
- The process of fixing or removing incorrect, corrupted, incorrectly formatted, duplicate, or incomplete data within a dataset. When combining multiple data sources, there are many opportunities for data to be duplicated or mislabeled. If data is incorrect, outcomes and algorithms are unreliable, even though they may look correct.

## Table of content 
- Packages importing 
- Reading CSV File
- Data cleaning V2
- Data Preprocessing

In [11]:
# import libraries
import pandas as pd
import numpy as np
import warnings
import datefinder
import re
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', 30)
warnings.filterwarnings("ignore")

<hr>

## <center> Reading CSV File

In [12]:
All_Data1 = pd.read_csv("CSV/df_After_Cleaning.csv")
All_Data1.drop(All_Data1.filter(regex="Unname"),axis=1, inplace=True)  # Remove Unnamed Column
All_Data1.head(1)

Unnamed: 0,Title_Product,Product_Group,Brand,Model,Series,Color,Style,Dimensions_H,Dimensions_W,Dimensions_D,Weight,Date_First_Year,Date_First_Month,Date_First_Day,Is_Gaming,Is_Labtop,Is_Desktop,Is_Notebook,Is_Chromebook,Is_2-in-1,Is_All-in-One,Is_Ultrabook,Is_Thin,Is_Convertible,Is_Detachable,Is_Mini,Is_Flip,Is_Tower,Is_Workstation,For_Business,For_Education,For_Entertainment,For_Personal,CPU_Brand,CPU_Model,CPU_Series,CPU_Gen,Cores,CPU_Speed,CPU_Version,L3_Cache,Smart_Cache,Core_Name,Threads,GPU_Brand,GPU_Model,GPU_Series,Video_Memory,Memory_Type,Storage_HDD,Storage_SSD,SSD_Is_NVMe,SSD_Is_M.2,Is_SSD,Is_HDD,Is_SSD_HDD,Memory_capacity,Memory_Type.1,Memory_Speed,Memory_Is_LPDD,Memory_Is_DIMM,Memory_Is_SDR,Memory_Is_DRAM,Memory_Is_VRAM,Memory_Is_DDR4,Memory_Is_DDR3,Memory_Is_DDR2,Memory_Is_DDR1,OS_Corporation,OS_Version,Is_OS_32-bit,Is_OS_64-bit,Screen_Size,X_res,Y_res,Has_Screen_5k,Has_Screen_4k,Has_Screen_2k,Has_Screen_UHD,Has_Screen_QHD,Has_Screen_FHD,Has_Screen_OLED,Has_Screen_LED,Has_Screen_LCD,Has_Touchscreen,Has_WideScreen,Has_Screen_IPS,Has_Screen_VR,Has_Screen_HDR,Has_Anti-Glare,Has_Glossy,Has_anti_reflection,Has_WebCam,AC_Power,Battery_Cell,Power_Supply_W,Bluetooth_V,Has_Bluetooth,Has_Ethernet,Has_WIFI,WiFI_6E,WiFI_6,WiFI_5,WIFi_4,Has_Reader,Has_Speakers,Has_HD_Audio,Has_Thunderbolt,USB_Gen,Type_C_Count,USB_C_Gen_3.2,USB_C_Gen_3.1,USB_C_Gen_3.0,USB_C_Gen_2.0,Type_A_Count,USB_A_Gen_3.2,USB_A_Gen_3.1,USB_A_Gen_3.0,USB_A_Gen_2.0,Has_Pointing_Stick,Product_Price_SR,Shipping_Price,Product_Price_US,Total_Price,Product_URL,Product_Images
0,ASUS TUF A15 Gaming & Ente...,Gaming Laptops,ASUS,,TUF A15,Gray,Standard,,,,,2022.0,11.0,10.0,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,AMD,Ryzen,Ryzen 7,7.0,8.0,3.2,6800H,16.0,0,,,NVIDIA,GeForce,3050,,DDR5,0.0,512,False,False,True,False,False,8,DDR5,,False,True,False,False,False,False,False,False,False,Microsoft,Windows 11 Home,False,True,15.6,1920.0,1080.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,,,,5.2,True,False,True,False,True,False,False,True,False,False,False,,0.0,True,False,False,True,0.0,True,False,False,True,False,5303.4,0.0,979.0,5303.4,https://www.newegg.com/p/2...,['https://c1.neweggimages....


In [13]:
All_Data1["USB_C_Ver"] = np.nan
All_Data1["USB_C_Ver"].fillna(All_Data1["USB_C_Gen_3.2"].apply(lambda x: "USB C 3.2" if x  else np.nan),inplace=True)
All_Data1["USB_C_Ver"].fillna(All_Data1["USB_C_Gen_3.1"].apply(lambda x: "USB C 3.1" if x  else np.nan),inplace=True)
All_Data1["USB_C_Ver"].fillna(All_Data1["USB_C_Gen_3.0"].apply(lambda x: "USB C 3.0" if x  else np.nan),inplace=True)
All_Data1["USB_C_Ver"].fillna(All_Data1["USB_C_Gen_2.0"].apply(lambda x: "USB C 2.0" if x  else np.nan),inplace=True)
All_Data1["USB_A_Ver"] = np.nan
All_Data1["USB_A_Ver"].fillna(All_Data1["USB_A_Gen_3.2"].apply(lambda x: "USB A 3.2" if x  else np.nan),inplace=True)
All_Data1["USB_A_Ver"].fillna(All_Data1["USB_A_Gen_3.1"].apply(lambda x: "USB A 3.1" if x  else np.nan),inplace=True)
All_Data1["USB_A_Ver"].fillna(All_Data1["USB_A_Gen_3.0"].apply(lambda x: "USB A 3.0" if x  else np.nan),inplace=True)
All_Data1["USB_A_Ver"].fillna(All_Data1["USB_A_Gen_2.0"].apply(lambda x: "USB A 2.0" if x  else np.nan),inplace=True)


In [14]:
All_Data1["Has_Pointing_Stick"].value_counts()

False    26494
True      4533
Name: Has_Pointing_Stick, dtype: int64

In [15]:
def Get_Screen_Type(float):
    if str(float).startswith("1"):
        return "FHD"
    elif str(float).startswith("2"):
        return "QHD"
    elif str(float).startswith("3") or str(float).startswith("4"):
        return "4K"
    elif str(float).startswith("5"):
        return "5K"
    else:
        return "No Screen"

In [16]:
All_Data1["Res_Type"] = np.nan
All_Data1["Res_Type"].fillna(All_Data1["X_res"].apply(Get_Screen_Type),inplace=True)
All_Data1["Res_Type"].fillna(All_Data1["X_res"].apply(lambda x: "5K" if x == 5120.0 else np.nan),inplace=True)
All_Data1["Res_Type"].fillna(All_Data1["X_res"].apply(lambda x: "FHD" if x == 1920.0 else np.nan),inplace=True)
All_Data1["Res_Type"].fillna(All_Data1["X_res"].apply(lambda x: "FHD" if x == 1600.0 else np.nan),inplace=True)
All_Data1["Res_Type"].fillna(All_Data1["X_res"].apply(lambda x: "4K" if x == 3840.0 else np.nan),inplace=True)
All_Data1["Res_Type"].fillna(All_Data1["X_res"].apply(lambda x: "2K" if x == 2256.0 else np.nan),inplace=True)
All_Data1["Res_Type"].fillna(All_Data1["X_res"].apply(lambda x: "FHD" if x == 1366.0 else np.nan),inplace=True)
All_Data1["Res_Type"].fillna(All_Data1["X_res"].apply(lambda x: "2K" if x == 2560.0 else np.nan),inplace=True)
All_Data1["Res_Type"].fillna(All_Data1["Has_Screen_5k"].apply(lambda x: "5K" if x  else np.nan),inplace=True)
All_Data1["Res_Type"].fillna(All_Data1["Has_Screen_4k"].apply(lambda x: "4K" if x  else np.nan),inplace=True)
All_Data1["Res_Type"].fillna(All_Data1["Has_Screen_UHD"].apply(lambda x: "4K" if x  else np.nan),inplace=True)
All_Data1["Res_Type"].fillna(All_Data1["Has_Screen_FHD"].apply(lambda x: "FHD" if x  else np.nan),inplace=True)
All_Data1["Res_Type"].fillna(All_Data1["Has_Screen_2k"].apply(lambda x: "QHD" if x  else np.nan),inplace=True)
All_Data1["Res_Type"].fillna(All_Data1["Has_Screen_QHD"].apply(lambda x: "QHD" if x  else np.nan),inplace=True)
###########################################
All_Data1["test"] = All_Data1["Has_Screen_IPS"] & All_Data1["Has_Screen_VR"]
All_Data1["Screen_Tec"] = np.nan
All_Data1["Screen_Tec"].fillna(All_Data1["Is_Desktop"].apply(lambda x: "No Screen" if x  else np.nan),inplace=True)
All_Data1["Screen_Tec"].fillna(All_Data1["test"].apply(lambda x: "IPS and VR" if x  else np.nan),inplace=True)
All_Data1["Screen_Tec"].fillna(All_Data1["Has_Screen_IPS"].apply(lambda x: "IPS" if x  else np.nan),inplace=True)
All_Data1["Screen_Tec"].fillna(All_Data1["Has_Screen_VR"].apply(lambda x: "VR" if x  else np.nan),inplace=True)
All_Data1["Screen_Tec"].fillna(All_Data1["Has_Screen_HDR"].apply(lambda x: "HDR" if x  else np.nan),inplace=True)
All_Data1["Screen_Tec"].fillna(All_Data1["Has_Anti-Glare"].apply(lambda x: "Anti-Glare" if x  else np.nan),inplace=True)
All_Data1["Screen_Tec"].fillna(All_Data1["Has_Glossy"].apply(lambda x: "Glossy" if x  else np.nan),inplace=True)
All_Data1["Screen_Tec"].fillna(All_Data1["Has_anti_reflection"].apply(lambda x: "anti_reflection" if x  else np.nan),inplace=True)
All_Data1["WebCam"] = np.nan
All_Data1["WebCam"].fillna(All_Data1["Is_Desktop"].apply(lambda x: "No WebCam" if x  else np.nan),inplace=True)
All_Data1["WebCam"].fillna(All_Data1["Is_Chromebook"].apply(lambda x: "Yes" if x  else np.nan),inplace=True)
All_Data1["WebCam"].fillna(All_Data1["Is_Notebook"].apply(lambda x: "Yes" if x  else np.nan),inplace=True)
All_Data1["WebCam"].fillna(All_Data1["Is_Labtop"].apply(lambda x: "Yes" if x  else np.nan),inplace=True)
All_Data1["WebCam"].fillna(All_Data1["Has_WebCam"].apply(lambda x: "Yes" if x  else np.nan),inplace=True)
All_Data1["WebCam"].fillna(All_Data1["Is_All-in-One"].apply(lambda x: "Yes" if x  else "No"),inplace=True)
All_Data1["WiFi_Ver"] = np.nan
All_Data1["WiFi_Ver"].fillna(All_Data1["WiFI_6E"].apply(lambda x: "Wifi 6E" if x  else np.nan),inplace=True)
All_Data1["WiFi_Ver"].fillna(All_Data1["WiFI_6"].apply(lambda x: "Wifi 6" if x  else np.nan),inplace=True)
All_Data1["WiFi_Ver"].fillna(All_Data1["WiFI_5"].apply(lambda x: "Wifi 5" if x  else np.nan),inplace=True)
All_Data1["WiFi_Ver"].fillna(All_Data1["WIFi_4"].apply(lambda x: "Wifi 4" if x  else np.nan),inplace=True)

All_Data1["USB_C_Ver"] = np.nan
All_Data1["USB_C_Ver"].fillna(All_Data1["USB_C_Gen_3.2"].apply(lambda x: "USB C 3.2" if x  else np.nan),inplace=True)
All_Data1["USB_C_Ver"].fillna(All_Data1["USB_C_Gen_3.1"].apply(lambda x: "USB C 3.1" if x  else np.nan),inplace=True)
All_Data1["USB_C_Ver"].fillna(All_Data1["USB_C_Gen_3.0"].apply(lambda x: "USB C 3.0" if x  else np.nan),inplace=True)
All_Data1["USB_C_Ver"].fillna(All_Data1["USB_C_Gen_2.0"].apply(lambda x: "USB C 2.0" if x  else np.nan),inplace=True)
All_Data1["USB_A_Ver"] = np.nan
All_Data1["USB_A_Ver"].fillna(All_Data1["USB_A_Gen_3.2"].apply(lambda x: "USB A 3.2" if x  else np.nan),inplace=True)
All_Data1["USB_A_Ver"].fillna(All_Data1["USB_A_Gen_3.1"].apply(lambda x: "USB A 3.1" if x  else np.nan),inplace=True)
All_Data1["USB_A_Ver"].fillna(All_Data1["USB_A_Gen_3.0"].apply(lambda x: "USB A 3.0" if x  else np.nan),inplace=True)
All_Data1["USB_A_Ver"].fillna(All_Data1["USB_A_Gen_2.0"].apply(lambda x: "USB A 2.0" if x  else np.nan),inplace=True)







In [17]:
All_Data1[["Res_Type","X_res","Y_res"]].sample(2)

Unnamed: 0,Res_Type,X_res,Y_res
6784,No Screen,0.0,0.0
30723,FHD,1920.0,1200.0


In [18]:
All_Data1["Res_Type"].value_counts().sum()

31027

In [19]:
All_Data1["Product_Type"] = np.nan
All_Data1["Product_Style"] = np.nan
All_Data1["Product_Usage"] = np.nan
All_Data1["OS_Type"] = np.nan
All_Data1["SSD_Type"] = np.nan
All_Data1["Storage"] = np.nan
All_Data1["SSD_Type"].fillna(All_Data1["SSD_Is_NVMe"].apply(lambda x: "NVMe" if x  else np.nan),inplace=True)
All_Data1["SSD_Type"].fillna(All_Data1["SSD_Is_M.2"].apply(lambda x: "M.2" if x  else np.nan),inplace=True)
All_Data1["Product_Type"].fillna(All_Data1["Is_Thin"].apply(lambda x: "Labtop Computers" if x  else np.nan),inplace=True)
All_Data1["Product_Type"].fillna(All_Data1["Is_Notebook"].apply(lambda x: "Labtop Computers" if x  else np.nan),inplace=True)
All_Data1["Product_Type"].fillna(All_Data1["Is_Chromebook"].apply(lambda x: "Labtop Computers" if x  else np.nan),inplace=True)
All_Data1["Product_Type"].fillna(All_Data1["Is_2-in-1"].apply(lambda x: "Labtop Computers" if x  else np.nan),inplace=True)
All_Data1["Product_Type"].fillna(All_Data1["Is_Ultrabook"].apply(lambda x: "Labtop Computers" if x  else np.nan),inplace=True)
All_Data1["Product_Type"].fillna(All_Data1["Is_Desktop"].apply(lambda x: "Desktop Computers" if x  else np.nan),inplace=True)
All_Data1["Product_Type"].fillna(All_Data1["Is_All-in-One"].apply(lambda x: "All-in-One" if x  else np.nan),inplace=True)
#############################
All_Data1["Product_Style"].fillna(All_Data1["Is_Thin"].apply(lambda x: "Thin" if x  else np.nan),inplace=True)
All_Data1["Product_Style"].fillna(All_Data1["Is_Convertible"].apply(lambda x: "Convertible" if x  else np.nan),inplace=True)
All_Data1["Product_Style"].fillna(All_Data1["Is_Detachable"].apply(lambda x: "Detachable" if x  else np.nan),inplace=True)
All_Data1["Product_Style"].fillna(All_Data1["Is_Mini"].apply(lambda x: "Mini" if x  else np.nan),inplace=True)
All_Data1["Product_Style"].fillna(All_Data1["Is_Tower"].apply(lambda x: "Tower" if x  else np.nan),inplace=True)
All_Data1["Product_Style"].fillna(All_Data1["Is_Workstation"].apply(lambda x: "Workstation" if x  else np.nan),inplace=True)
##############################
All_Data1["Product_Usage"].fillna(All_Data1["For_Business"].apply(lambda x: "Business" if x  else np.nan),inplace=True)
All_Data1["Product_Usage"].fillna(All_Data1["For_Education"].apply(lambda x: "Education" if x  else np.nan),inplace=True)
All_Data1["Product_Usage"].fillna(All_Data1["For_Entertainment"].apply(lambda x: "Entertainment" if x  else np.nan),inplace=True)
All_Data1["Product_Usage"].fillna(All_Data1["For_Personal"].apply(lambda x: "Personal" if x  else np.nan),inplace=True)
All_Data1["Storage"].fillna(All_Data1["Is_SSD"].apply(lambda x: "SSD" if x  else np.nan),inplace=True)
All_Data1["Storage"].fillna(All_Data1["Is_HDD"].apply(lambda x: "HDD" if x  else np.nan),inplace=True)
All_Data1["Storage"].fillna(All_Data1["Is_SSD_HDD"].apply(lambda x: "Both" if x  else np.nan),inplace=True)
All_Data1["Memory_Type.1"].fillna(All_Data1["Memory_Is_DDR4"].apply(lambda x: "DDR4" if x  else np.nan),inplace=True)
All_Data1["Memory_Type.1"].fillna(All_Data1["Memory_Is_DDR3"].apply(lambda x: "DDR3" if x  else np.nan),inplace=True)
All_Data1["Memory_Type.1"].fillna(All_Data1["Memory_Is_DDR2"].apply(lambda x: "DDR2" if x  else np.nan),inplace=True)
All_Data1["Memory_Type.1"].fillna(All_Data1["Memory_Is_DDR1"].apply(lambda x: "DDR1" if x  else np.nan),inplace=True)
All_Data1["OS_Type"].fillna(All_Data1["Is_OS_32-bit"].apply(lambda x: "32-Bit" if x  else np.nan),inplace=True)
All_Data1["OS_Type"].fillna(All_Data1["Is_OS_64-bit"].apply(lambda x: "64-Bit" if x  else np.nan),inplace=True)


In [20]:
Data = pd.DataFrame()

In [21]:
All_Data1.shape

(31027, 139)

In [22]:
def Get_CPU_Gen(string):
    if string == "1.0":
        return "1st"
    elif string == "2.0":
        return "2nd"
    elif string == "3.0":
        return "3rd"
    elif string == "4.0":
        return "4th"
    elif string == "5.0":
        return "5th"
    elif string == "6.0":
        return "6th"
    elif string == "7.0":
        return "7th"
    elif string == "8.0":
        return "8th"
    elif string == "9.0":
        return "9th"
    elif string == "10.0":
        return "10th"
    elif string == "11.0":
        return "11th"
    elif string == "12.0":
        return "12th"
    elif string == "13.0":
        return "13th"
    else:
        np.nan

In [23]:
def Get_Os(string):
    if "windows 10" in string.lower() or "Windows10" in string.lower():
        if "pro" in string.lower():
            return "Windows 10 Pro"
        else:
            return "Windows 10 Home"
    elif "windows 11" in string.lower():
        if "pro" in string.lower():
            return "Windows 11 Pro"
        else:
            return "Windows Home Pro"
    elif string == "Windows 11 Home":
        return "Windows 11 Home"
    elif "windows 7" in string.lower():
        if "home" in string.lower():
            return "Windows 7 Home"
        else:
            return "Windows 7 Pro"
    elif "windows 10" in string.lower():
        if "home" in string.lower():
            return "Windows 10 Home"
    elif "windows 11" in string.lower():
        if "home" in string.lower():
            return "Windows 11 Home"
    elif "windows 8" in string.lower():
            return "Windows 8 Pro"
    elif "chrome" in string.lower():
            return "Chrome OS"
    elif "macOS" in string.lower() or "mac" in string.lower() or "os" in string.lower():
            return "MacOS"
    elif "xp" in string.lower():
            return "Windows XP"
    elif "freeDOS" in string.lower():
            return "FreeDOS"
    else:
        return "No Operating System"

In [25]:
Data["Title_Product"] = All_Data1["Title_Product"]
Data["Brand"] = All_Data1["Brand"]
Data["Model"] = All_Data1["Model"]
Data["Series"] = All_Data1["Series"]
Data["Color"] = All_Data1["Color"]
Data["Date_First_Year"] = All_Data1["Date_First_Year"]
Data["Product_Type"] = All_Data1["Product_Type"]
Data["Product_Style"] = All_Data1["Product_Style"] 
Data["Product_Usage"] = All_Data1["Product_Usage"]
Data["Weight"] = All_Data1["Weight"]
Data["CPU_Brand"] = All_Data1["CPU_Brand"]
Data["CPU_Model"] = All_Data1["CPU_Model"]
Data["CPU_Series"] = All_Data1["CPU_Series"]
Data["CPU_Gen"] = All_Data1["CPU_Gen"].astype("str").apply(Get_CPU_Gen)
Data["Cores"] = All_Data1["Cores"]
Data["CPU_Speed"] = All_Data1["CPU_Speed"]
Data["Core_Name"] = All_Data1["Core_Name"]
Data["GPU_Brand"] = All_Data1["GPU_Brand"]
Data["GPU_Model"] = All_Data1["GPU_Model"]
Data["Video_Memory"] = All_Data1["Video_Memory"]
Data["Is_SSD"] = All_Data1["Is_SSD"]
Data["Storage_HDD"] = All_Data1["Storage_HDD"]
Data["Storage_SSD"] = All_Data1["Storage_SSD"]
Data["SSD_Type"] = All_Data1["SSD_Type"]
Data["Memory_capacity"] = All_Data1["Memory_capacity"].astype("str").apply(lambda x: pd.to_numeric(x,errors='coerce') if pd.to_numeric(x,errors='coerce')> 1 else np.nan)
Data["Memory_Type"] = All_Data1["Memory_Type.1"]
Data["Memory_Speed"] = All_Data1["Memory_Speed"]
Data["OS_Corporation"] = All_Data1["OS_Corporation"]
Data["OS_Version"] = All_Data1["OS_Version"].astype("str").apply(Get_Os)
Data["Screen_Size"] = All_Data1["Screen_Size"]
Data["X_res"] = All_Data1["X_res"]
Data["Y_res"] = All_Data1["Y_res"]
Data["Res_Type"] = All_Data1["Res_Type"]
Data["Touchscreen"] = All_Data1["Has_Touchscreen"]
Data["WideScreen"] = All_Data1["Has_WideScreen"]
Data["Screen_Tec"] = All_Data1["Screen_Tec"]
Data["WebCam"] = All_Data1["WebCam"]
Data["AC_Power"] = All_Data1["AC_Power"]
Data["Battery_Cell"] = All_Data1["Battery_Cell"]
Data["Power_Supply_W"] = All_Data1["Power_Supply_W"]
Data["Bluetooth"] = All_Data1["Has_Bluetooth"]
Data["Bluetooth_V"] = All_Data1["Bluetooth_V"]
Data["Ethernet"] = All_Data1["Has_Ethernet"]
Data["WiFi_Ver"] = All_Data1["WiFi_Ver"]
Data["Has_Thunderbolt"] = All_Data1["Has_Thunderbolt"]
Data["Type_C_Count"] = All_Data1["Type_C_Count"]
Data["Type_A_Count"] = All_Data1["Type_A_Count"] 
Data["USB_C_Ver"] = All_Data1["USB_C_Ver"]
Data["USB_C_Ver"] = All_Data1["USB_C_Ver"]
Data["Product_Price_US"] = All_Data1["Product_Price_US"]
Data["Product_Price_SR"] = All_Data1["Product_Price_SR"]
Data["Shipping_Price"] = All_Data1["Shipping_Price"]
Data["Total_Price"] = All_Data1["Total_Price"]
Data["Product_URL"] = All_Data1["Product_URL"]
Data["Product_Images"] = All_Data1["Product_Images"]

In [26]:
Data.drop_duplicates(inplace=True)

In [27]:
Data.describe()

Unnamed: 0,Date_First_Year,Weight,Cores,CPU_Speed,Video_Memory,Storage_HDD,Storage_SSD,Memory_capacity,Memory_Speed,Screen_Size,X_res,Y_res,AC_Power,Battery_Cell,Power_Supply_W,Bluetooth_V,Type_C_Count,Type_A_Count,Product_Price_US,Product_Price_SR,Shipping_Price,Total_Price
count,30809.0,27701.0,26304.0,30795.0,16374.0,31010.0,31010.0,30781.0,15773.0,31010.0,30684.0,30684.0,14933.0,20569.0,10287.0,21265.0,30508.0,29976.0,31010.0,31010.0,31010.0,31010.0
mean,2021.352429,3.462597,4.600859,2.447629,5.801515,392.707578,772.235343,18.469289,2573.870982,12.5915,1624.314757,934.458284,99.018416,2.510282,426.574512,4.816633,2.052019,1.917034,1329.505155,6559.116168,88.754777,6647.870027
std,1.079532,4.091575,2.630052,0.769103,3.224825,849.431121,1082.94406,17.648923,679.473443,6.915925,1001.25147,584.325294,73.064989,2.165396,313.527852,0.478267,2.351098,2.096048,1103.921964,4977.766585,216.577676,5021.29095
min,2014.0,0.498952,1.0,0.0,1.0,0.0,0.0,1.171875,1066.0,0.0,0.0,0.0,20.0,0.0,45.0,2.0,0.0,0.0,9.33,277.14,0.0,277.14
25%,2021.0,1.700971,2.0,2.0,4.0,0.0,256.0,8.0,2400.0,12.0,1366.0,768.0,45.0,0.0,180.0,4.2,0.0,0.0,580.347826,3237.954,0.0,3299.3625
50%,2022.0,2.222603,4.0,2.5,6.0,0.0,512.0,16.0,2400.0,15.4,1920.0,1080.0,65.0,3.0,260.0,5.0,2.0,1.0,984.885,4955.37,0.0,5068.915
75%,2022.0,2.812273,6.0,3.0,8.0,500.0,1024.0,20.0,3200.0,15.6,1920.0,1080.0,170.0,3.0,750.0,5.2,2.0,2.0,1727.0,8316.4,0.0,8390.4675
max,2022.0,28.576319,16.0,5.4,32.0,8192.0,8192.0,512.0,6400.0,55.0,5120.0,2880.0,330.0,9.0,1400.0,5.3,9.0,9.0,12608.99,58801.354,1262.78,58801.35


In [28]:
Data.sample(2)

Unnamed: 0,Title_Product,Brand,Model,Series,Color,Date_First_Year,Product_Type,Product_Style,Product_Usage,Weight,CPU_Brand,CPU_Model,CPU_Series,CPU_Gen,Cores,CPU_Speed,Core_Name,GPU_Brand,GPU_Model,Video_Memory,Is_SSD,Storage_HDD,Storage_SSD,SSD_Type,Memory_capacity,Memory_Type,Memory_Speed,OS_Corporation,OS_Version,Screen_Size,X_res,Y_res,Res_Type,Touchscreen,WideScreen,Screen_Tec,WebCam,AC_Power,Battery_Cell,Power_Supply_W,Bluetooth,Bluetooth_V,Ethernet,WiFi_Ver,Has_Thunderbolt,Type_C_Count,Type_A_Count,USB_C_Ver,Product_Price_US,Product_Price_SR,Shipping_Price,Total_Price,Product_URL,Product_Images
26677,"Apple MacBook Pro 14"" Lapt...",APPLE,MLXX3LL/A,MacBook Air,Black,2022.0,Labtop Computers,Thin,Personal,0.920793,Apple,,,8th,2.0,1.2,Broadwell,Apple,M1,,True,256.0,512,,16.0,LPDDR3L,1600.0,Apple,MacOS,14.0,2560.0,1664.0,QHD,True,False,IPS,Yes,,,,True,4.0,False,Wifi 5,True,1.0,1.0,USB C 3.0,1964.95,9838.77,0.0,9838.77,https://www.newegg.com/p/2...,['//c1.neweggimages.com/Ma...
26871,Apple MacBook MacBook Pro ...,APPLE,MD313LL/A,MacBook Pro,Silver,2020.0,Labtop Computers,Thin,,2.041166,Intel,Core,i5,2nd,2.0,2.4,Skylake,Intel,Graphics,,False,500.0,512,M.2,4.0,DDR3,1333.0,Apple,MacOS,13.3,1280.0,800.0,FHD,True,True,IPS,Yes,85.0,,,True,2.1,True,Wifi 6,True,2.0,2.0,USB C 3.1,379.39,2545.194,0.0,2545.19,https://www.newegg.com/sil...,['https://c1.neweggimages....


In [None]:
Data.to_csv("CSV/df_After_Clean.csv")