# MOBILE PRICE CLASSIFICATION DRIVER

In [None]:
import numpy as np
import pandas as pd
from sklearn import neural_network
from sklearn.preprocessing import StandardScaler,Imputer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
# objects for later use
imp = Imputer(missing_values=0, strategy='mean', axis=0)
scaler = StandardScaler()
ann = neural_network.MLPClassifier(shuffle=True,
                                   hidden_layer_sizes=(17,17),
                                   activation="logistic",
                                   tol=0,
                                   warm_start=True,
                                   max_iter=100000, verbose=False,
                                   random_state=1000)

In [None]:
def crossValidation(X, y, clf):
    
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    
    imp.fit(X_train)
    X_train = imp.transform(X_train)
    X_test = imp.transform(X_test)
    
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    
    clf.fit(X_train, y_train)
    prediction = clf.predict(X_test)
    cv_sc = accuracy_score(y_test, prediction)
    tr_sc = accuracy_score(y_train, clf.predict(X_train))
    
    print("CV score :", 100 * cv_sc)
    print("CV error :", 100 / cv_sc - 100)
    print("Train score :", 100 * tr_sc)
    print("Train error :", 100 / tr_sc - 100, "%")
    

pass

def realTEST(X, y, ID_vec, test, clf):

    imp.fit(X)
    X = imp.transform(X)

    scaler.fit(X)
    X = scaler.transform(X)

    clf.fit(X, y)
    predict = clf.predict(scaler.transform(imp.transform(test)))
    
    predict = predict.round(decimals=2)
    predict = predict.reshape(predict.shape[0], 1)
    predict = np.concatenate([ID_vec, predict], axis=1)
    predict = pd.DataFrame(data=predict, columns=['id', 'price_range'])
    
    predict.to_csv("../answer.csv", index=False, header=True)

In [None]:
train = pd.read_csv('../data/train.csv')
test = pd.read_csv('../data/test.csv')
id_vec = np.array(test.loc[:,test.columns == 'id'])

# train = train.drop([],axis=1)
test = test.drop(['id'],axis=1)

X = train.drop(['price_range'],axis=1)
y = train['price_range']

In [None]:
train.describe()

In [None]:
fig=plt.figure(figsize=(8, 8), dpi= 80, facecolor='w', edgecolor='k')

X_LABEL = ''
Y_LABEL = ''

plt.xlabel(X_LABEL)
plt.ylabel(Y_LABEL)

# train.plot(x=X_LABEL,y=Y_LABEL)
# train.plot(kind='box', vert=False, positions=[1, 4, 5, 6, 8])
plt.scatter(train[Y_LABEL],train[X_LABEL])

In [None]:
crossValidation(X,y,ann)

In [None]:
realTEST(X,y,id_vec,test,ann)