## Model Loading and Feature Engineering

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, MinMaxScaler

df = pd.read_csv("..\\data\\ndtv_data_final.csv", index_col=0) 

# --- Step 0: Data Transformation ---
# Change Price to USD
df["Price (USD)"] = round(df.Price*0.011).astype(int)
df.drop(["Price"], axis=1, inplace=True)

# Change RAM to GB
df["RAM (GB)"] = round(df["RAM (MB)"]/1000, 2)
df.drop(["RAM (MB)"], axis=1, inplace=True)

# Binary Variables
binaryVariableList = list()

for x in df.columns:
    if df[x].value_counts().index.to_list() == ['Yes', 'No']:
        binaryVariableList.append(x)

for x in binaryVariableList:
    df[x]=df[x].map({"Yes": 1, "No":0})

# Pixel
df["Pixel per inch (PPI)"] = np.sqrt((df["Resolution x"]**2 + df["Resolution y"]**2))/df["Screen size (inches)"]
df["Pixel per inch (PPI)"] = df["Pixel per inch (PPI)"].round(2)

# Brand help
brands_by_country = {
    "USA": ["Apple", "Google", "HP", "Microsoft", "Razer", "Cat", "Blu", "BlackBerry", "Motorola", "Nuu Mobile"],
    "South Korea": ["Samsung", "LG"],
    "China": ["10.or", "Black Shark", "Coolpad", "Gionee", "Honor", "Huawei", "Lenovo", "Meizu", "Nubia", "OnePlus", "Oppo", "Realme", "Vivo", "Xiaomi", "ZTE", "Zopo", "Phicomm", "Zuk", "LeEco", "Homtom", "Poco", "Sansui", "TCL"],
    "Taiwan": ["Acer", "Asus", "HTC"],
    "Japan": ["Sony", "Panasonic", "Sharp"],
    "India": ["Aqua", "Billion", "Celkon", "Comio", "InFocus", "Intex", "Itel", "Jio", "Jivi", "Karbonn", "Kult", "Lava", "Lephone", "Lyf", "M-tech", "Micromax", "Mobiistar", "Onida", "Reach", "Smartron", "Spice", "Swipe", "Tambo", "Videocon", "Xolo", "Yu", "Zen", "Ziox", "mPhone", "iBall", "iVoomi"],
    "EU": ["Nokia", "Alcatel","Gigaset", "Philips"],
    "Hong Kong": ["Infinix", "Tecno", "Itel"],
}

# branc top price
topPrice = dict()
for x in df["Brand"].unique():
    topPrice[x] = df[df["Brand"] == x]["Price (USD)"].max()

df["Brand Top Price"] = df["Brand"].map(topPrice)


# Reverse the keys and values in the dictionary
brand_to_country = {}
for country, brands in brands_by_country.items():
    for brand in brands:
        brand_to_country[brand] = country

# Map them to the dataset
df["Brand Origin"] = df["Brand"].map(brand_to_country)

# Setting price ranges
priceRangeLabel = ["Ultra Budget", "Budget", "Mid Range", "Upper Mid", "Premium", "Flagship"]
priceRangeBins = [0, 100, 250, 400, 700, 1000, np.inf]
df["Price Range"] = pd.cut(df["Price (USD)"], bins=priceRangeBins, labels=priceRangeLabel)

# --- Step 1: Encode Operating System ---
def categorize_os(os):
    os = os.lower()
    if 'android' in os:
        return 'Android'
    elif 'windows' in os:
        return 'Windows'
    elif 'ios' in os:
        return 'iOS'
    else:
        return 'Other'

df['Operating system'] = df['Operating system'].apply(categorize_os)

# --- Step 2: One-Hot Encode Nominal Data (Brand Origin & Operating System) --- 
# df = pd.get_dummies(df, columns=['Brand Origin', 'Operating system'], drop_first=True) -- Uncommet if needed

# --- Step 3: Convert Price Range into Ordinal Encoding ---
price_range_mapping = {
    "Ultra Budget": 0, 
    "Budget": 1, 
    "Mid Range": 2, 
    "Upper Mid": 3, 
    "Premium": 4, 
    "Flagship": 5
    }
df['Price Range'] = df['Price Range'].map(price_range_mapping)

# --- Step 4: Create Interaction Terms ---
df['Camera Score'] = df['Rear camera'] + df['Front camera']
df['Performance Score'] = (df['RAM (GB)'] * 2) + (df['Internal storage (GB)'] / 64)
df['Battery-to-Screen Ratio'] = df['Battery capacity (mAh)'] / df['Screen size (inches)']

# --- Step 5: Normalize and Standardize Numerical Values ---
# Identify numerical columns
numerical_cols = [
    'Brand Top Price', 'Battery capacity (mAh)', 'Resolution x', 'Resolution y',
    'Screen size (inches)', 'Pixel per inch (PPI)', 'RAM (GB)', 'Internal storage (GB)',
    'Rear camera', 'Front camera', 'Number of SIMs',
    'Camera Score', 'Performance Score', 'Battery-to-Screen Ratio'
]

# Standardization (Z-score normalization)
scaler = StandardScaler()
df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

## Seperate Features and Labels for Machine Learning

In [None]:
# X (features)
X=df.drop(columns=['Name', 'Brand', 'Model', 'Price (USD)','Price Range'], axis=1)
X=pd.get_dummies(X)

# y (labels)
y=df["Price Range"]

## Your Model (Split-Train-Test)

In [None]:
# Put your model development procedure here