In [23]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load dataset
df = pd.read_csv('CleanedDataset.csv')

features = ['price', 'brand_name', '5G_or_not', 'processor_brand', 'battery_capacity', 
             'ram_capacity', 'internal_memory', 'refresh_rate', 'os', 'primary_camera_rear', 
             'fast_charging']
target = 'model'

df = df[features + [target]]

df = df.dropna()  

# Encode categorical features
label_encoders = {}
for column in ['brand_name', 'processor_brand', 'os', '5G_or_not']:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

# Scale numerical features
scaler = StandardScaler()
df[['price', 'battery_capacity', 'ram_capacity', 'internal_memory', 'refresh_rate', 'primary_camera_rear', 'fast_charging']] = \
    scaler.fit_transform(df[['price', 'battery_capacity', 'ram_capacity', 'internal_memory', 'refresh_rate', 'primary_camera_rear', 'fast_charging']])

# Example input
input_data = {
    'price': 14000,
    'brand_name': 'Poco',
    '5G_or_not': 'No',  # 'Yes' for 5G
    'processor_brand': 'Snapdragon',
    'battery_capacity': 4500,
    'ram_capacity': 6,
    'internal_memory': 128,
    'refresh_rate': 120,
    'os': 'Android',
    'primary_camera_rear': 64,
    'fast_charging': 25  # example numeric value for fast charging
}

# Preprocess the input data
input_df = pd.DataFrame([input_data])
for column in ['brand_name', 'processor_brand', 'os', '5G_or_not']:
    input_df[column] = label_encoders[column].transform(input_df[column])

input_df[['price', 'battery_capacity', 'ram_capacity', 'internal_memory', 'refresh_rate', 'primary_camera_rear', 'fast_charging']] = \
    scaler.transform(input_df[['price', 'battery_capacity', 'ram_capacity', 'internal_memory', 'refresh_rate', 'primary_camera_rear', 'fast_charging']])

# Filter the dataset based on the selected brand
filtered_df = df[df['brand_name'] == input_df['brand_name'].iloc[0]]

if not filtered_df.empty:
    # Compute similarity on the filtered dataset
    df_features = filtered_df[features]
    input_features = input_df[features]

    # Calculate cosine similarity
    similarity_scores = cosine_similarity(input_features, df_features)

    most_similar_index = similarity_scores.argmax()
    most_similar_phone = filtered_df.iloc[most_similar_index]
    suggested_model = most_similar_phone[target]
    print(f'Suggested Model: {suggested_model}')
else:
    print("No phones found for the selected brand.")


Suggested Model: Poco X3 Pro


In [25]:
import pickle

# Save label encoders and scaler
with open('label_encoders.pkl', 'wb') as le_file:
    pickle.dump(label_encoders, le_file)

with open('scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)

# Save the dataset
df.to_csv('processed_dataset.csv', index=False)
