## Model Loading and Feature Engineering

In [417]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, MinMaxScaler

df = pd.read_csv("..\\data\\ndtv_data_final.csv", index_col=0) 

# --- Step 0: Data Transformation ---
# Change Price to USD
df["Price (USD)"] = round(df.Price*0.011).astype(int)
df.drop(["Price"], axis=1, inplace=True)

# Change RAM to GB
df["RAM (GB)"] = round(df["RAM (MB)"]/1000, 2)
df.drop(["RAM (MB)"], axis=1, inplace=True)

# Binary Variables
binaryVariableList = list()

for x in df.columns:
    if df[x].value_counts().index.to_list() == ['Yes', 'No']:
        binaryVariableList.append(x)

for x in binaryVariableList:
    df[x]=df[x].map({"Yes": 1, "No":0})

# Pixel
df["Pixel per inch (PPI)"] = np.sqrt((df["Resolution x"]**2 + df["Resolution y"]**2))/df["Screen size (inches)"]
df["Pixel per inch (PPI)"] = df["Pixel per inch (PPI)"].round(2)

# Brand help
brands_by_country = {
    "USA": ["Apple", "Google", "HP", "Microsoft", "Razer", "Cat", "Blu", "BlackBerry", "Motorola", "Nuu Mobile"],
    "South Korea": ["Samsung", "LG"],
    "China": ["10.or", "Black Shark", "Coolpad", "Gionee", "Honor", "Huawei", "Lenovo", "Meizu", "Nubia", "OnePlus", "Oppo", "Realme", "Vivo", "Xiaomi", "ZTE", "Zopo", "Phicomm", "Zuk", "LeEco", "Homtom", "Poco", "Sansui", "TCL"],
    "Taiwan": ["Acer", "Asus", "HTC"],
    "Japan": ["Sony", "Panasonic", "Sharp"],
    "India": ["Aqua", "Billion", "Celkon", "Comio", "InFocus", "Intex", "Itel", "Jio", "Jivi", "Karbonn", "Kult", "Lava", "Lephone", "Lyf", "M-tech", "Micromax", "Mobiistar", "Onida", "Reach", "Smartron", "Spice", "Swipe", "Tambo", "Videocon", "Xolo", "Yu", "Zen", "Ziox", "mPhone", "iBall", "iVoomi"],
    "EU": ["Nokia", "Alcatel","Gigaset", "Philips"],
    "Hong Kong": ["Infinix", "Tecno", "Itel"],
}

# branc top price
topPrice = dict()
for x in df["Brand"].unique():
    topPrice[x] = df[df["Brand"] == x]["Price (USD)"].max()

df["Brand Top Price"] = df["Brand"].map(topPrice)


# Reverse the keys and values in the dictionary
brand_to_country = {}
for country, brands in brands_by_country.items():
    for brand in brands:
        brand_to_country[brand] = country

# Map them to the dataset
df["Brand Origin"] = df["Brand"].map(brand_to_country)

# Setting price ranges
priceRangeLabel = ["Ultra Budget", "Budget", "Mid Range", "Upper Mid", "Premium", "Flagship"]
priceRangeBins = [0, 100, 250, 400, 700, 1000, np.inf]
df["Price Range"] = pd.cut(df["Price (USD)"], bins=priceRangeBins, labels=priceRangeLabel)

# --- Step 1: Encode Operating System ---
def categorize_os(os):
    os = os.lower()
    if 'android' in os:
        return 'Android'
    elif 'windows' in os:
        return 'Windows'
    elif 'ios' in os:
        return 'iOS'
    else:
        return 'Other'

df['Operating system'] = df['Operating system'].apply(categorize_os)

# --- Step 2: One-Hot Encode Nominal Data (Brand Origin & Operating System) --- 
# df = pd.get_dummies(df, columns=['Brand Origin', 'Operating system'], drop_first=True) -- Uncommet if needed

# --- Step 3: Convert Price Range into Ordinal Encoding ---
price_range_mapping = {
    "Ultra Budget": 0, 
    "Budget": 1, 
    "Mid Range": 2, 
    "Upper Mid": 3, 
    "Premium": 4, 
    "Flagship": 5
    }
df['Price Range'] = df['Price Range'].map(price_range_mapping)

# --- Step 4: Create Interaction Terms ---
df['Camera Score'] = df['Rear camera'] + df['Front camera']
df['Performance Score'] = (df['RAM (GB)'] * 2) + (df['Internal storage (GB)'] / 64)
df['Battery-to-Screen Ratio'] = df['Battery capacity (mAh)'] / df['Screen size (inches)']

# --- Step 5: Normalize and Standardize Numerical Values ---
# Identify numerical columns
numerical_cols = [
    'Brand Top Price', 'Battery capacity (mAh)', 'Resolution x', 'Resolution y',
    'Screen size (inches)', 'Pixel per inch (PPI)', 'RAM (GB)', 'Internal storage (GB)',
    'Rear camera', 'Front camera', 'Number of SIMs',
    'Camera Score', 'Performance Score', 'Battery-to-Screen Ratio'
]

## Disabled
# Standardization (Z-score normalization)
# scaler = StandardScaler()
# df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

## Seperate Features and Labels for Machine Learning

In [418]:
# X (features)
X=df.drop(columns=['Name', 'Brand', 'Model','Price Range'], axis=1)

scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])
X=pd.get_dummies(X)

# y (labels)
y=df["Price Range"]

## Your Model (Split-Train-Test)

### Recommendation System - Guo

Our recommendation system of mobiles will be initially built on **Content-Based Filtering** since we don't have a user-item interaction data (e.g. Ratings, Pageviews etc.) The idea is to show similar products according to a given product in the database, thus it is suitable to be shown in a specific item page under the banner "Similar Products" or "You may also like ..." 

In [419]:
from sklearn.metrics.pairwise import cosine_similarity

In the following part we'll show some variants of this simple CB Filtering algorithm, therefore we use a decorator to avoid repeating same print codes in different version of "similar_radar" functions.

In [420]:
# Decorator for printing
def print_recommendations(func):
    def wrapper(*args, **kwargs):
        result = func(*args, **kwargs)
        n_rows = len(result)
        if kwargs['print_products'] ==  True:
            print(f"Product Name: {result.loc[0]["Name"]}\n\nYou may also like:")

            for i in range(1, n_rows):
                print(f"{result.loc[i]["Name"]}")
                print(f"Price: ${result.loc[i]["Price (USD)"]}")
                print(f" Operating System: {result.loc[i]["Operating system"]}")
                print(f" Screen Size/Resolution: {result.loc[i]["Screen size (inches)"]} Inches / {result.loc[i]["Resolution x"]:.0f} * {result.loc[i]["Resolution y"]:.0f}")
                print(f" CPU Cores: {result.loc[i]["Processor"]:.0f} Cores")
                print(f" Battery: {result.loc[i]["Battery capacity (mAh)"]:.0f} mAh")
                print(f" ROM/RAM: {result.loc[i]["Internal storage (GB)"]:.1f} GB / {result.loc[i]["RAM (GB)"]:.1f} GB")
                print(f" Rear Camera/Front Camera: {result.loc[i]["Rear camera"]} MP / {result.loc[i]["Front camera"]} MP\n")

        if kwargs['print_products'] == False:
            return result
        
    return wrapper

Below is a basic version of similar radar that provides similar suggestions on a specific item page

In [421]:
@print_recommendations
def similar_radar_v1(data: pd.DataFrame, product_index: int, n_rec: int, print_products=False):

    # Remove unnecessary columns
    X = data.drop(columns=['Name', 'Brand', 'Model','Price Range'], axis=1)
    X = pd.get_dummies(X, dtype=int)

    # Standardize
    numerical_cols = [
    'Brand Top Price', 'Battery capacity (mAh)', 'Resolution x', 'Resolution y', "Processor", 
    'Screen size (inches)', 'Pixel per inch (PPI)', 'RAM (GB)', 'Internal storage (GB)',
    'Rear camera', 'Front camera', 'Number of SIMs',
    'Camera Score', 'Performance Score', 'Battery-to-Screen Ratio'
    ]
    scaler = StandardScaler()
    X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

    # Compute similarity matrix
    similarity_matrix = pd.DataFrame(cosine_similarity(X), index=X.index, columns=X.index)

    # Compute similar products DataFrame
    similar_products = data[2:].reindex(similarity_matrix[product_index].sort_values(ascending=False).index).reset_index(drop=True)

    return similar_products.iloc[0:n_rec+1]

We the test it with Mobile index **No.7**, it will suggest **Top 5** most similar products and print details of these products.

In [422]:
similar_radar_v1(df, 7, 5, print_products=True)

Product Name: Samsung Galaxy Note 10+

You may also like:
Samsung Galaxy Note 10
Price: $770.0
 Operating System: Android
 Screen Size/Resolution: 6.3 Inches / 1080 * 2280
 CPU Cores: 8 Cores
 Battery: 3500 mAh
 ROM/RAM: 256.0 GB / 8.0 GB
 Rear Camera/Front Camera: 12.0 MP / 10.0 MP

Samsung Galaxy S20
Price: $737.0
 Operating System: Android
 Screen Size/Resolution: 6.2 Inches / 1440 * 3200
 CPU Cores: 8 Cores
 Battery: 4000 mAh
 ROM/RAM: 128.0 GB / 8.0 GB
 Rear Camera/Front Camera: 12.0 MP / 10.0 MP

Samsung Galaxy S20+
Price: $814.0
 Operating System: Android
 Screen Size/Resolution: 6.7 Inches / 1440 * 3200
 CPU Cores: 8 Cores
 Battery: 4500 mAh
 ROM/RAM: 128.0 GB / 8.0 GB
 Rear Camera/Front Camera: 12.0 MP / 10.0 MP

Samsung Galaxy Note 9
Price: $627.0
 Operating System: Android
 Screen Size/Resolution: 6.4 Inches / 1440 * 2960
 CPU Cores: 8 Cores
 Battery: 4000 mAh
 ROM/RAM: 128.0 GB / 6.0 GB
 Rear Camera/Front Camera: 12.0 MP / 8.0 MP

HTC U11+
Price: $583.0
 Operating System: A

However, in a real context user might have these needs:

- They only want to see products with the same brand as current one or from all other diffrent brands.

- They only want to see products in the same price range as current one.

In [423]:
@print_recommendations
def similar_radar_v2(data: pd.DataFrame, product_index: int, n_rec: int, print_products: bool = False, suggest_type: str = None, same_range: bool = False):

    # Remove unnecessary columns
    X = data.drop(columns=['Name', 'Brand', 'Model','Price Range'], axis=1)
    X = pd.get_dummies(X, dtype=int)

    # Standardize
    numerical_cols = [
    'Brand Top Price', 'Battery capacity (mAh)', 'Resolution x', 'Resolution y', "Processor", 
    'Screen size (inches)', 'Pixel per inch (PPI)', 'RAM (GB)', 'Internal storage (GB)',
    'Rear camera', 'Front camera', 'Number of SIMs',
    'Camera Score', 'Performance Score', 'Battery-to-Screen Ratio'
    ]
    scaler = StandardScaler()
    X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

    # Compute similarity matrix
    similarity_matrix = pd.DataFrame(cosine_similarity(X), index=X.index, columns=X.index)

    # Compute similar products DataFrame
    similar_products = data[2:].reindex(similarity_matrix[product_index].sort_values(ascending=False).index).reset_index(drop=True)
    
    # Extract current brand
    current_brand = data.loc[product_index]["Brand"]
    current_range = data.loc[product_index]["Price Range"]
    
    if suggest_type == "same":
        similar_products = similar_products[similar_products["Brand"] == current_brand].reset_index(drop=True)

    if suggest_type == "different":
        similar_products = pd.concat([similar_products.iloc[[0]], similar_products[similar_products["Brand"] != current_brand]]).reset_index(drop=True)

    if same_range == True:
        similar_products = similar_products[similar_products["Price Range"] == current_range].reset_index(drop=True)

    return similar_products.iloc[0:n_rec+1]

For the same mobile **No. 7** with `suggest_type` set to `same`, our similar radar should only generate phones from Samsung.

In [424]:
similar_radar_v2(df, 7, 5, print_products=True, suggest_type="same")

Product Name: Samsung Galaxy Note 10+

You may also like:
Samsung Galaxy Note 10
Price: $770.0
 Operating System: Android
 Screen Size/Resolution: 6.3 Inches / 1080 * 2280
 CPU Cores: 8 Cores
 Battery: 3500 mAh
 ROM/RAM: 256.0 GB / 8.0 GB
 Rear Camera/Front Camera: 12.0 MP / 10.0 MP

Samsung Galaxy S20
Price: $737.0
 Operating System: Android
 Screen Size/Resolution: 6.2 Inches / 1440 * 3200
 CPU Cores: 8 Cores
 Battery: 4000 mAh
 ROM/RAM: 128.0 GB / 8.0 GB
 Rear Camera/Front Camera: 12.0 MP / 10.0 MP

Samsung Galaxy S20+
Price: $814.0
 Operating System: Android
 Screen Size/Resolution: 6.7 Inches / 1440 * 3200
 CPU Cores: 8 Cores
 Battery: 4500 mAh
 ROM/RAM: 128.0 GB / 8.0 GB
 Rear Camera/Front Camera: 12.0 MP / 10.0 MP

Samsung Galaxy Note 9
Price: $627.0
 Operating System: Android
 Screen Size/Resolution: 6.4 Inches / 1440 * 2960
 CPU Cores: 8 Cores
 Battery: 4000 mAh
 ROM/RAM: 128.0 GB / 6.0 GB
 Rear Camera/Front Camera: 12.0 MP / 8.0 MP

Samsung Galaxy Fold
Price: $1815.0
 Operati

For the same mobile **No. 7** with `suggest_type` set to `different`, our similar radar should only generate phones not from Samsung

In [425]:
similar_radar_v2(df, 7, 5, print_products=True, suggest_type="different", same_range=True)

Product Name: Samsung Galaxy Note 10+

You may also like:
Asus ROG Phone
Price: $770.0
 Operating System: Android
 Screen Size/Resolution: 6.0 Inches / 1080 * 2160
 CPU Cores: 8 Cores
 Battery: 4000 mAh
 ROM/RAM: 128.0 GB / 8.0 GB
 Rear Camera/Front Camera: 12.0 MP / 8.0 MP

HTC U12+
Price: $825.0
 Operating System: Android
 Screen Size/Resolution: 6.0 Inches / 1440 * 2880
 CPU Cores: 8 Cores
 Battery: 3500 mAh
 ROM/RAM: 64.0 GB / 6.0 GB
 Rear Camera/Front Camera: 12.0 MP / 8.0 MP

Google Pixel 4 XL
Price: $923.0
 Operating System: Android
 Screen Size/Resolution: 6.3 Inches / 1440 * 3040
 CPU Cores: 8 Cores
 Battery: 3700 mAh
 ROM/RAM: 64.0 GB / 6.0 GB
 Rear Camera/Front Camera: 16.0 MP / 8.0 MP

Google Pixel 4
Price: $764.0
 Operating System: Android
 Screen Size/Resolution: 5.7 Inches / 1080 * 2280
 CPU Cores: 8 Cores
 Battery: 2800 mAh
 ROM/RAM: 64.0 GB / 6.0 GB
 Rear Camera/Front Camera: 16.0 MP / 8.0 MP

Huawei Mate 30 Pro
Price: $850.0
 Operating System: Android
 Screen Size/Res

For the same mobile **No. 7** with `same_range` set to `True`, our similar radar should only generate phone within the same price range.

In [426]:
similar_radar_v2(df, 7, 5, print_products=True, same_range="True")

Product Name: Samsung Galaxy Note 10+

You may also like:
Samsung Galaxy Note 10
Price: $770.0
 Operating System: Android
 Screen Size/Resolution: 6.3 Inches / 1080 * 2280
 CPU Cores: 8 Cores
 Battery: 3500 mAh
 ROM/RAM: 256.0 GB / 8.0 GB
 Rear Camera/Front Camera: 12.0 MP / 10.0 MP

Samsung Galaxy S20
Price: $737.0
 Operating System: Android
 Screen Size/Resolution: 6.2 Inches / 1440 * 3200
 CPU Cores: 8 Cores
 Battery: 4000 mAh
 ROM/RAM: 128.0 GB / 8.0 GB
 Rear Camera/Front Camera: 12.0 MP / 10.0 MP

Samsung Galaxy S20+
Price: $814.0
 Operating System: Android
 Screen Size/Resolution: 6.7 Inches / 1440 * 3200
 CPU Cores: 8 Cores
 Battery: 4500 mAh
 ROM/RAM: 128.0 GB / 8.0 GB
 Rear Camera/Front Camera: 12.0 MP / 10.0 MP

Samsung Galaxy Note 9
Price: $627.0
 Operating System: Android
 Screen Size/Resolution: 6.4 Inches / 1440 * 2960
 CPU Cores: 8 Cores
 Battery: 4000 mAh
 ROM/RAM: 128.0 GB / 6.0 GB
 Rear Camera/Front Camera: 12.0 MP / 8.0 MP

HTC U11+
Price: $583.0
 Operating System: A

Furthermore, if a user give a clear indication of what's the characteristic he emphasize to find similar products, we can use it as a weight vector in the computation of similarity matrix. 

We define a `generate_weights` function for simulating a weight vector, and check the difference in suggestions. When the `pref_type` set to `Performance` it simulates a weight vector priorizing on better processor, better RAM and also weakening the influence of cameras and battery. In real context, these weight vectors can be predefined and stored in a dictionary and called by giving users a set of options such as ("Products with similar performance", "Products with similar cameras" etc.)

In [427]:
def generate_weights(ref_data, pref_type):
    weights = pd.Series(np.ones(ref_data.shape[1]), index=ref_data.columns)

    if pref_type == "Performance":
        influnced_columns = ["Battery capacity (mAh)", "Processor", 'Screen size (inches)', "Rear camera", 
                             "Front camera", "Price (USD)", "RAM (GB)"]
        weights[influnced_columns] = [0.3, 5.0, 0.3, 0.6, 0.2, 0.8, 3.0]
        weights = weights / weights.sum()
        
    return weights

@print_recommendations
def similar_radar_v3(data: pd.DataFrame, product_index: int, n_rec: int, print_products: bool = False, 
                     suggest_type: str = None, same_range: bool = False, preference: str = None):

    # Remove unnecessary columns
    X = data.drop(columns=['Name', 'Brand', 'Model','Price Range'], axis=1)
    X = pd.get_dummies(X, dtype=int)
    
    # Standardize
    numerical_cols = [
    'Brand Top Price', 'Battery capacity (mAh)', 'Resolution x', 'Resolution y', "Processor", 
    'Screen size (inches)', 'Pixel per inch (PPI)', 'RAM (GB)', 'Internal storage (GB)',
    'Rear camera', 'Front camera', 'Number of SIMs',
    'Camera Score', 'Performance Score', 'Battery-to-Screen Ratio'
    ]
    scaler = StandardScaler()
    X[numerical_cols] = scaler.fit_transform(X[numerical_cols])
    
    # Apply weights
    X = X * generate_weights(X, preference)

    # Compute similarity matrix
    similarity_matrix = pd.DataFrame(cosine_similarity(X), index=X.index, columns=X.index)

    # Compute similar products DataFrame
    similar_products = data[2:].reindex(similarity_matrix[product_index].sort_values(ascending=False).index).reset_index(drop=True)
    
    # Extract current brand
    current_brand = data.loc[product_index]["Brand"]
    current_range = data.loc[product_index]["Price Range"]
    
    if suggest_type == "same":
        similar_products = similar_products[similar_products["Brand"] == current_brand].reset_index(drop=True)

    if suggest_type == "different":
        similar_products = pd.concat([similar_products.iloc[[0]], similar_products[similar_products["Brand"] != current_brand]]).reset_index(drop=True)

    if same_range == True:
        similar_products = similar_products[similar_products["Price Range"] == current_range].reset_index(drop=True)

    return similar_products.iloc[0:n_rec+1]

Then we do the same for mobile **No. 7** but with `preference` set to "Performance".

In [428]:
similar_radar_v3(df, 7, 5, print_products=True, preference="Performance")

Product Name: Samsung Galaxy Note 10+

You may also like:
Oppo Find X
Price: $650.0
 Operating System: Android
 Screen Size/Resolution: 6.42 Inches / 1080 * 2340
 CPU Cores: 8 Cores
 Battery: 3730 mAh
 ROM/RAM: 256.0 GB / 8.0 GB
 Rear Camera/Front Camera: 16.0 MP / 25.0 MP

Samsung Galaxy Note 10
Price: $770.0
 Operating System: Android
 Screen Size/Resolution: 6.3 Inches / 1080 * 2280
 CPU Cores: 8 Cores
 Battery: 3500 mAh
 ROM/RAM: 256.0 GB / 8.0 GB
 Rear Camera/Front Camera: 12.0 MP / 10.0 MP

Samsung Galaxy S20
Price: $737.0
 Operating System: Android
 Screen Size/Resolution: 6.2 Inches / 1440 * 3200
 CPU Cores: 8 Cores
 Battery: 4000 mAh
 ROM/RAM: 128.0 GB / 8.0 GB
 Rear Camera/Front Camera: 12.0 MP / 10.0 MP

Samsung Galaxy S20+
Price: $814.0
 Operating System: Android
 Screen Size/Resolution: 6.7 Inches / 1440 * 3200
 CPU Cores: 8 Cores
 Battery: 4500 mAh
 ROM/RAM: 128.0 GB / 8.0 GB
 Rear Camera/Front Camera: 12.0 MP / 10.0 MP

Asus ROG Phone
Price: $770.0
 Operating System: And

From the result we can see, the top suggestion become mobile "Oppo Find X", indicating our weight vector indeed influence the computation of similarity.