In [50]:
# imports
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder


df = pd.read_csv("data/ndtv_data_final.csv", index_col=0)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1359 entries, 0 to 1358
Data columns (total 21 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Name                    1359 non-null   object 
 1   Brand                   1359 non-null   object 
 2   Model                   1359 non-null   object 
 3   Battery capacity (mAh)  1359 non-null   int64  
 4   Screen size (inches)    1359 non-null   float64
 5   Touchscreen             1359 non-null   object 
 6   Resolution x            1359 non-null   int64  
 7   Resolution y            1359 non-null   int64  
 8   Processor               1359 non-null   int64  
 9   RAM (MB)                1359 non-null   int64  
 10  Internal storage (GB)   1359 non-null   float64
 11  Rear camera             1359 non-null   float64
 12  Front camera            1359 non-null   float64
 13  Operating system        1359 non-null   object 
 14  Wi-Fi                   1359 non-null   objec

In [51]:
categorical = ["Brand","Touchscreen","Operating system","Wi-Fi","Bluetooth","GPS","3G","4G/ LTE"]

label_enc = LabelEncoder()

# encode categorical 
encodings = {}
for col in categorical:
    label_enc.fit(df[col])
    encodings[col] = dict(zip(label_enc.classes_, map(int, label_enc.transform(label_enc.classes_))))

print(encodings)

df[categorical] = df[categorical].apply(lambda col: label_enc.fit_transform(col))

# convert INR to USD as of 10/22
df["Price"] = round(df["Price"] * 0.012, 2)
df["RAM"] = df["RAM (MB)"] / 1000

df = df.drop(columns=["Name", "Model", "RAM (MB)"])

# change column names
col_rename = {
    "Battery capacity (mAh)" : "Battery",
    "Internal storage (GB)" : "Storage",
    "Operating system" : "OS",
    "Wi-Fi" : "WiFi",
    "4G/ LTE": "4G",
    "Screen size (inches)" : "Screen size",
    "Number of SIMs" : "nSIM"
}

df.rename(columns=col_rename, inplace=True)

{'Brand': {'10.or': 0, 'Acer': 1, 'Alcatel': 2, 'Apple': 3, 'Aqua': 4, 'Asus': 5, 'Billion': 6, 'Black Shark': 7, 'BlackBerry': 8, 'Blu': 9, 'Cat': 10, 'Celkon': 11, 'Comio': 12, 'Coolpad': 13, 'Gionee': 14, 'Google': 15, 'HP': 16, 'HTC': 17, 'Homtom': 18, 'Honor': 19, 'Huawei': 20, 'InFocus': 21, 'Infinix': 22, 'Intex': 23, 'Itel': 24, 'Jio': 25, 'Jivi': 26, 'Karbonn': 27, 'Kult': 28, 'LG': 29, 'Lava': 30, 'LeEco': 31, 'Lenovo': 32, 'Lephone': 33, 'Lyf': 34, 'M-tech': 35, 'Meizu': 36, 'Micromax': 37, 'Microsoft': 38, 'Mobiistar': 39, 'Motorola': 40, 'Nokia': 41, 'Nubia': 42, 'Nuu Mobile': 43, 'OnePlus': 44, 'Onida': 45, 'Oppo': 46, 'Panasonic': 47, 'Phicomm': 48, 'Philips': 49, 'Poco': 50, 'Razer': 51, 'Reach': 52, 'Realme': 53, 'Samsung': 54, 'Sansui': 55, 'Smartron': 56, 'Sony': 57, 'Spice': 58, 'Swipe': 59, 'TCL': 60, 'Tambo': 61, 'Tecno': 62, 'Videocon': 63, 'Vivo': 64, 'Xiaomi': 65, 'Xolo': 66, 'Yu': 67, 'ZTE': 68, 'Zen': 69, 'Ziox': 70, 'Zopo': 71, 'Zuk': 72, 'iBall': 73, 'iVoom

In [52]:
df.head()

Unnamed: 0,Brand,Battery,Screen size,Touchscreen,Resolution x,Resolution y,Processor,Storage,Rear camera,Front camera,OS,WiFi,Bluetooth,GPS,nSIM,3G,4G,Price,RAM
0,44,4085,6.67,1,1440,3120,8,256.0,48.0,16.0,0,1,1,1,2,1,1,707.98,12.0
1,53,4000,6.5,1,1080,2400,8,64.0,64.0,16.0,0,1,1,1,2,1,1,335.99,6.0
2,3,3969,6.5,1,1242,2688,6,64.0,12.0,12.0,6,1,1,1,2,1,1,1282.8,4.0
3,3,3110,6.1,1,828,1792,6,64.0,12.0,12.0,6,1,1,1,2,1,1,754.8,4.0
4,29,4000,6.4,1,1080,2340,8,128.0,12.0,32.0,0,1,1,1,1,0,0,599.88,6.0


In [53]:
df.describe()

Unnamed: 0,Brand,Battery,Screen size,Touchscreen,Resolution x,Resolution y,Processor,Storage,Rear camera,Front camera,OS,WiFi,Bluetooth,GPS,nSIM,3G,4G,Price,RAM
count,1359.0,1359.0,1359.0,1359.0,1359.0,1359.0,1359.0,1359.0,1359.0,1359.0,1359.0,1359.0,1359.0,1359.0,1359.0,1359.0,1359.0,1359.0,1359.0
mean,37.951435,2938.48933,5.29131,0.987491,811.543046,1490.777778,5.551141,30.654864,12.070199,7.037969,0.178072,0.994113,0.988962,0.92053,1.833701,0.893304,0.744665,137.591001,2.488778
std,19.184224,873.514133,0.671357,0.111184,270.707271,557.78012,2.196562,36.950241,8.948337,6.295448,0.920598,0.076527,0.104517,0.270571,0.374457,0.30884,0.43621,166.28992,1.66444
min,0.0,1010.0,2.4,0.0,240.0,320.0,1.0,0.064,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,5.93,0.064
25%,23.0,2300.0,5.0,1.0,720.0,1280.0,4.0,8.0,8.0,2.0,0.0,1.0,1.0,1.0,2.0,1.0,0.0,57.16,1.0
50%,37.0,3000.0,5.2,1.0,720.0,1280.0,4.0,16.0,12.2,5.0,0.0,1.0,1.0,1.0,2.0,1.0,1.0,83.99,2.0
75%,54.0,3500.0,5.7,1.0,1080.0,1920.0,8.0,32.0,13.0,8.0,0.0,1.0,1.0,1.0,2.0,1.0,1.0,143.99,3.0
max,75.0,6000.0,7.3,1.0,2160.0,3840.0,10.0,512.0,108.0,48.0,6.0,1.0,1.0,1.0,3.0,1.0,1.0,2099.88,12.0
