#### 1. Import libraries

In [1]:
"""
@author: Jerock Kalala
Week_11 Assignment: Hands-On Predictive Analytics with Python
Building the predictive model object
"""
## imports
import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output
from keras.models import load_model
import plotly.graph_objs as go
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
import joblib

#### 2. Loading data & Data transformation

In [2]:
diamonds = pd.read_csv("E:\\Bellevue\\Spring_2023\\DSC410_Predictive Analytics\\Week_11\\diamonds.csv")
print("The initial dimension of the data set is: ", diamonds.shape)
diamonds.head()

The initial dimension of the data set is:  (53940, 10)


Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


#### *Diamonds-model-training

In [3]:
## Preparing the dataset
diamonds = diamonds.loc[(diamonds['x']>0) | (diamonds['y']>0)]
diamonds.loc[11182, 'x'] = diamonds['x'].median()
diamonds.loc[11182, 'z'] = diamonds['z'].median()
diamonds = diamonds.loc[~((diamonds['y'] > 30) | (diamonds['z'] > 30))]
diamonds = pd.concat([diamonds, pd.get_dummies(diamonds['cut'], prefix='cut', drop_first=True)], axis=1)
diamonds = pd.concat([diamonds, pd.get_dummies(diamonds['color'], prefix='color', drop_first=True)], axis=1)
diamonds = pd.concat([diamonds, pd.get_dummies(diamonds['clarity'], prefix='clarity', drop_first=True)], axis=1)

In [4]:
## Dimensionality reduction
from sklearn.decomposition import PCA
pca = PCA(n_components=1, random_state=123)
diamonds['dim_index'] = pca.fit_transform(diamonds[['x','y','z']])
diamonds.drop(['x','y','z'], axis=1, inplace=True)

#### 3. Producing the object to train the model

In [5]:
## Creating X and y
X = diamonds.drop(['cut','color','clarity','price'], axis=1)
y = np.log(diamonds['price'])

In [6]:
## Standarization: centering and scaling
numerical_features = ['carat', 'depth', 'table', 'dim_index']
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X.loc[:, numerical_features] = scaler.fit_transform(X[numerical_features])

#### 4. creation of the neural network

In [7]:
## Building the neural network
n_input = X.shape[1]
n_hidden1 = 32
n_hidden2 = 16
n_hidden3 = 8

nn_reg = Sequential()
nn_reg.add(Dense(units=n_hidden1, activation='relu', input_shape=(n_input,)))
nn_reg.add(Dense(units=n_hidden2, activation='relu'))
nn_reg.add(Dense(units=n_hidden3, activation='relu'))
# output layer
nn_reg.add(Dense(units=1, activation=None))


#### 5. Training the model

In [8]:
## Training the neural network
batch_size = 32
n_epochs = 40
nn_reg.compile(loss='mean_absolute_error', optimizer='adam')
nn_reg.fit(X, y, epochs=n_epochs, batch_size=batch_size)


Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x22e998086a0>

In [9]:
## Serializing:
# PCA
joblib.dump(pca, 'E:\\Bellevue\\Spring_2023\\DSC410_Predictive Analytics\\Week_11\\pca.joblib')

['E:\\Bellevue\\Spring_2023\\DSC410_Predictive Analytics\\Week_11\\pca.joblib']

In [10]:
# Scaler
joblib.dump(scaler, 'E:\\Bellevue\\Spring_2023\\DSC410_Predictive Analytics\\Week_11\\scaler.joblib')

['E:\\Bellevue\\Spring_2023\\DSC410_Predictive Analytics\\Week_11\\scaler.joblib']

In [11]:
# Trained model
nn_reg.save("E:\\Bellevue\\Spring_2023\\DSC410_Predictive Analytics\\Week_11\\diamond-prices-model.h5")

### *Predict-diamond-prices

In [19]:
app = dash.Dash(__name__)
app.css.append_css({
    'external_url': 'https://codepen.io/chriddyp/pen/bWLwgP.css'
})

model = load_model('E:\\Bellevue\\Spring_2023\\DSC410_Predictive Analytics\\Week_11\\diamond-prices-model.h5')
pca = joblib.load('E:\\Bellevue\\Spring_2023\\DSC410_Predictive Analytics\\Week_11\\pca.joblib')
scaler = joblib.load('E:\\Bellevue\\Spring_2023\\DSC410_Predictive Analytics\\Week_11\\scaler.joblib')
#model._make_predict_function()

In [13]:
## Div for carat
input_carat = dcc.Input(
    id='carat',
    type='numeric',
    value=0.7)

div_carat = html.Div(
        children=[html.H3('Carat:'), input_carat],
        className="four columns"
        )


## Div for depth
input_depth = dcc.Input(
    id='depth',
    placeholder='',
    type='numeric',
    value=60)

div_depth = html.Div(
        children=[html.H3('Depth:'), input_depth],
        className="four columns"
        )


## Div for table
input_table = dcc.Input(
    id='table',
    placeholder='',
    type='numeric',
    value=60)

div_table = html.Div(
        children=[html.H3('Table:'), input_table],
        className="four columns"
        )

In [14]:
## Div for x
input_x = dcc.Input(
    id='x',
    placeholder='',
    type='numeric',
    value=5)

div_x = html.Div(
        children=[html.H3('x value:'), input_x],
        className="four columns"
        )
## Div for y
input_y = dcc.Input(
    id='y',
    placeholder='',
    type='numeric',
    value=5)

div_y = html.Div(
        children=[html.H3('y value:'), input_y],
        className="four columns"
        )
## Div for z
input_z = dcc.Input(
    id='z',
    placeholder='',
    type='numeric',
    value=3)

div_z = html.Div(
        children=[html.H3('z value: '), input_z],
        className="four columns"
        )

The three input for the categorical values

In [15]:
## Div for cut
cut_values = ['Fair', 'Good', 'Ideal', 'Premium', 'Very Good']
cut_options = [{'label': x, 'value': x} for x in cut_values]
input_cut = dcc.Dropdown(
    id='cut',
    options = cut_options,
    value = 'Ideal'
    )

div_cut = html.Div(
        children=[html.H3('Cut:'), input_cut],
        className="four columns"
        )


## Div for color
color_values = ['D', 'E', 'F', 'G', 'H', 'I', 'J']
color_options = [{'label': x, 'value': x} for x in color_values]
input_color = dcc.Dropdown(
    id='color',
    options = color_options,
    value = 'G'
    )

div_color = html.Div(
        children=[html.H3('Color:'), input_color],
        className="four columns"
        )


## Div for clarity
clarity_values = ['I1', 'IF', 'SI1', 'SI2', 'VS1', 'VS2', 'VVS1', 'VVS2']
clarity_options = [{'label': x, 'value': x} for x in clarity_values]
input_clarity = dcc.Dropdown(
    id='clarity',
    options = clarity_options,
    value = 'SI1'
    )

div_clarity = html.Div(
        children=[html.H3('Clarity:'), input_clarity],
        className="four columns"
        )

In [16]:
## Div for numerical characteristics
div_numerical = html.Div(
        children = [div_carat, div_depth, div_table],
        className="row"
        )


## Div for dimensions
div_dimensions = html.Div(
        children = [div_x, div_y, div_z],
        className="row"
        )


## Div for categorical
div_categorical = html.Div(
        children = [div_cut, div_color, div_clarity],
        className="row"
        )

Function that will take the value from the user to produce the price prediction

In [17]:
def get_prediction(carat, depth, table, x, y, z, cut, color, clarity):
    '''takes the inputs from the user and produces the price prediction'''

    cols = ['carat', 'depth', 'table',
            'cut_Good', 'cut_Ideal', 'cut_Premium', 'cut_Very Good',
            'color_E', 'color_F', 'color_G', 'color_H', 'color_I', 'color_J',
            'clarity_IF','clarity_SI1', 'clarity_SI2', 'clarity_VS1', 'clarity_VS2','clarity_VVS1', 'clarity_VVS2',
            'dim_index']

    cut_dict = {x: 'cut_' + x for x in cut_values[1:]}
    color_dict = {x: 'color_' + x for x in color_values[1:]}
    clarity_dict = {x: 'clarity_' + x for x in clarity_values[1:]}
    ## produce a dataframe with a single row of zeros

    df = pd.DataFrame(data = np.zeros((1,len(cols))), columns = cols)

    ## get the numeric characteristics
    df.loc[0,'carat'] = carat
    df.loc[0,'depth'] = depth
    df.loc[0,'table'] = table

    ## transform dimensions into a single dim_index using PCA
    dims_df = pd.DataFrame(data=[[x, y, z]], columns=['x','y','z'])
    df.loc[0,'dim_index'] = pca.transform(dims_df).flatten()[0]

     ## Use the one-hot encoding for the categorical features
    if cut!='Fair':
        df.loc[0, cut_dict[cut]] = 1

    if color!='D':
        df.loc[0, color_dict[color]] = 1

    if clarity != 'I1':
        df.loc[0, clarity_dict[clarity]] = 1

    ## Scale the numerical features using the trained scaler
    numerical_features = ['carat', 'depth', 'table', 'dim_index']
    df.loc[:,numerical_features] = scaler.transform(df.loc[:,numerical_features])

    ## Get the predictions using our trained neural network
    prediction = model.predict(df.values).flatten()[0]

    ## Transform the log-prices to prices
    prediction = np.exp(prediction)

    return int(prediction)

In [None]:
## App layout
app.layout = html.Div([
        html.H1('IDR Predict diamond prices'),

        html.H2('Enter the diamond characteristics to get the predicted price'),

        html.Div(
                children=[div_numerical, div_dimensions, div_categorical]
                ),
        html.H1(id='output',
                style={'margin-top': '50px', 'text-align': 'center'})
        ])

predictors = ['carat', 'depth', 'table', 'x', 'y', 'z', 'cut', 'color', 'clarity']
@app.callback(
        Output('output', 'children'),
        [Input(x, 'value') for x in predictors])
def show_prediction(carat, depth, table, x, y, z, cut, color, clarity):
    pred = get_prediction(carat, depth, table, x, y, z, cut, color, clarity)
    return str("Predicted Price: {:,}".format(pred))


if __name__ == '__main__':
    app.run_server(debug=True, use_reloader=False)

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__'
 * Debug mode: on
