In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr
from itertools import product
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder

# Load dataset
df = pd.read_csv("data/smartphone_data.csv")

# Descriptive Analysis
df.describe()

In [None]:
# Price Distribution
plt.figure(figsize=(8, 5))
sns.histplot(df["Price"], bins=10, kde=True, color="blue")
plt.title("Price Distribution of Smartphones")
plt.xlabel("Price (INR)")
plt.ylabel("Count")
plt.show()

In [None]:
# Correlation Analysis
correlation_matrix = df.corr(numeric_only=True)
correlation_matrix

In [None]:
plt.figure(figsize=(10, 6))
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Feature Correlation Heatmap")
plt.show()

In [None]:
# Conjoint Analysis
attributes = {
    "Brand": df["Brand"].unique(),
    "Storage": df["Storage"].unique(),
    "RAM": df["RAM"].unique()
}
profiles = list(product(*attributes.values()))
profiles_df = pd.DataFrame(profiles, columns=attributes.keys())
np.random.seed(42)
profiles_df["Preference_Score"] = np.random.randint(1, 100, len(profiles_df))
profiles_df.head()

In [None]:
# Feature Importance and Price Prediction
df["Brand"] = LabelEncoder().fit_transform(df["Brand"])
df["Storage"] = df["Storage"].str.replace("GB", "").astype(int)
df["RAM"] = df["RAM"].str.replace("GB", "").astype(int)
df["Camera"] = df["Camera"].str.replace("MP", "").astype(int)
X = df.drop(columns=["Price", "Processor"])
y = df["Price"]
model = LinearRegression()
model.fit(X, y)
feature_importance = pd.DataFrame({"Feature": X.columns, "Importance": model.coef_})
feature_importance.sort_values(by="Importance", ascending=False)