In [58]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from kerastuner.tuners import RandomSearch
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.base import BaseEstimator, TransformerMixin
from joblib import dump

# 1. Input Data

In [17]:
data = pd.read_csv("mobile/phone_specifics.csv")

# 2. Exploring Dataset

In [18]:
data.isnull().sum()

battery_power    0
blue             0
clock_speed      0
dual_sim         0
fc               0
four_g           0
int_memory       0
m_dep            0
mobile_wt        0
n_cores          0
pc               0
px_height        0
px_width         0
ram              0
sc_h             0
sc_w             0
talk_time        0
three_g          0
touch_screen     0
wifi             0
price_range      0
dtype: int64

# 3. Data Cleaning

In [25]:
data

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,794,1,0.5,1,0,1,2,0.8,106,6,...,1222,1890,668,13,4,19,1,1,0,0
1996,1965,1,2.6,1,0,0,39,0.2,187,4,...,915,1965,2032,11,10,16,1,1,1,2
1997,1911,0,0.9,1,1,1,36,0.7,108,8,...,868,1632,3057,9,1,5,1,1,0,3
1998,1512,0,0.9,0,4,1,46,0.1,145,5,...,336,670,869,18,10,19,1,1,1,0


In [51]:
# Custom transformer for converting target variable to categorical
class ToCategoricalTransformer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self
    
    def transform(self, y):
        return to_categorical(y)

feature_pipeline = Pipeline(steps=[
    ('ohe', ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), ["blue", "dual_sim", "four_g", "three_g",
                       "touch_screen", "wifi"])
    ],
    remainder='passthrough'  # Keep the remaining columns untouched
)),
    ('scaler', StandardScaler())
])

target_pipeline = Pipeline(steps=[('to_categorical', ToCategoricalTransformer())])

# Step 4. Training the model

In [52]:
# General
X = data.drop(columns=["price_range"])
y = data["price_range"]

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train_transformed = feature_pipeline.fit_transform(X_train)
X_test_transformed = feature_pipeline.transform(X_test)
y_train_categorical = target_pipeline.fit_transform(y_train)
y_test_categorical = target_pipeline.transform(y_test)

In [53]:
def create_model(learning_rate=0.001):
    model = Sequential()
    model.add(Dense(64, input_shape=(X_train_transformed.shape[1],), activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(y_train_categorical.shape[1], activation='softmax'))

    optimizer = Adam(learning_rate=learning_rate)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

basic_model = create_model()
basic_model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 64)                1728      
                                                                 
 dense_7 (Dense)             (None, 32)                2080      
                                                                 
 dense_8 (Dense)             (None, 4)                 132       
                                                                 
Total params: 3940 (15.39 KB)
Trainable params: 3940 (15.39 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [54]:
def build_model(hp):
    model = Sequential()
    model.add(Dense(units=hp.Int('units', min_value=32, max_value=256, step=32),
                    activation='relu', input_shape=(X_train_transformed.shape[1],)))
    model.add(Dense(units=hp.Int('units', min_value=32, max_value=256, step=32), activation='relu'))
    model.add(Dense(y_train_categorical.shape[1], activation='softmax'))

    model.compile(optimizer=Adam(hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
                  loss='categorical_crossentropy', metrics=['accuracy'])
    return model

tuner = RandomSearch(build_model, objective='val_accuracy', max_trials=5, executions_per_trial=3, directory='mobile_nl', project_name='mobile')
tuner.search(X_train_transformed, y_train_categorical, epochs=10, validation_split=0.2)

Reloading Tuner from mobile_nl\mobile\tuner0.json


In [55]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
model = create_model(learning_rate=best_hps.get('learning_rate'))
history = model.fit(X_train_transformed, y_train_categorical, epochs=50, validation_split=0.2, verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [56]:
y_pred = model.predict(X_test_transformed)
y_pred_classes = y_pred.argmax(axis=1)
accuracy = accuracy_score(y_test, y_pred_classes)
print("Accuracy on Test Set:", accuracy)

Accuracy on Test Set: 0.9275


In [60]:
# Save the feature pipeline
dump(feature_pipeline, 'feature_pipeline.joblib')

# Save the target pipeline
dump(target_pipeline, 'target_pipeline.joblib')

# Save the model
model.save('mobile_model.keras')

# 6. Loading the model

In [78]:
import pandas as pd
from joblib import load
from tensorflow.keras.models import load_model

# Load the feature and target pipelines
feature_pipeline = load('feature_pipeline.joblib')

# Load the TensorFlow model
model = load_model('mobile_model.keras')

# New data (example)
new_data = data.iloc[[189]].drop(columns=["price_range"])

# Preprocess the new data using the loaded pipelines
new_data_transformed = feature_pipeline.transform(new_data)

# Make a prediction with the loaded model
prediction = model.predict(new_data_transformed).argmax(axis=1)

# Output the prediction
print("Predicted Label:", prediction)

Predicted Label: [3]


In [75]:
data.iloc[[10]].drop(columns=["price_range"])

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,pc,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi
10,769,1,2.9,1,0,0,9,0.1,182,5,1,248,874,3946,5,2,7,0,0,0
