# **Mobile phone price classification**

**Import libraries + data**

In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn import tree

test_data = pd.read_csv("test.csv")
train_data = pd.read_csv("train.csv")


**Inspect data**

In [None]:
display(train_data)
display(test_data)
print(train_data.columns)
print(test_data.columns)
print(train_data.describe())
test_data = test_data.drop('id', axis=1)

**Finding linear relationships**

In [None]:
# Function to find regression values between variables and price range

def linear_regression_model(dataset,x,y):
    reg = LinearRegression()
    reg.fit(dataset[[x]], dataset[[y]])
    slope = reg.coef_[0][0]     # Calculates gradient of linear regression
    return slope   

In [None]:
column_list= train_data.columns.values.tolist()

for i in range(0,len(column_list)):
    slope = linear_regression_model(train_data,'price_range', column_list[i])
    print('The correlation between ' + str(column_list[i] + ' and price range is ' + str(slope)))

# From output we can see no direct link between a single variable and price range


**Selecting prediction targets, features, and defining model**

In [None]:
# Prediction target y
y = train_data.price_range

# Selecting features X
column_list.remove("price_range")
price_range_features = column_list
x = train_data[price_range_features]

# Defining model
price_range_model = DecisionTreeRegressor(random_state=1)

# Fitting model
price_range_model.fit(x, y)

# Text representation of model
text_representation = tree.export_text(price_range_model)
#print(text_representation)

**Model validation**

In [None]:
'''split data into training and validation data, for both features and target
The split is based on a random number generator. Supplying a numeric value to
the random_state argument guarantees we get the same split every time we run this script.'''

train_X, val_X, train_y, val_y = train_test_split(x, y, random_state = 0)

# Fit model
price_range_model.fit(train_X, train_y)

# get predicted prices on validation data
val_predictions = price_range_model.predict(val_X)
MAE = mean_absolute_error(val_y, val_predictions)
print(MAE)
if MAE<0.3:
    print('This model is suitable')
else:
    print('Model will need improvement')

**Applying to test dataset**

In [None]:
predicted_phone_prices = price_range_model.predict(test_data)
predicted_phone_prices = predicted_phone_prices.tolist()
print(predicted_phone_prices)
test_data["price_range"] = predicted_phone_prices
display(test_data)