In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')
from sklearn import metrics
import numpy as np
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
import os

In [2]:
# Set the backend using tensorflow
os.environ['KERAS_BACKEND'] = 'tensorflow'

# Suppress tensorflow INFO messages
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

In [3]:
# The backend must be set before importing keras, not after
import keras
keras.utils.set_random_seed(812)

In [5]:
diamonds = pd.read_csv('Diamonds.csv')
print (diamonds)

       Unnamed: 0  carat        cut color clarity  depth  table  price     x  \
0               0   0.23      Ideal     E     SI2   61.5   55.0    326  3.95   
1               1   0.21    Premium     E     SI1   59.8   61.0    326  3.89   
2               2   0.23       Good     E     VS1   56.9   65.0    327  4.05   
3               3   0.29    Premium     I     VS2   62.4   58.0    334  4.20   
4               4   0.31       Good     J     SI2   63.3   58.0    335  4.34   
...           ...    ...        ...   ...     ...    ...    ...    ...   ...   
53935       53935   0.72      Ideal     D     SI1   60.8   57.0   2757  5.75   
53936       53936   0.72       Good     D     SI1   63.1   55.0   2757  5.69   
53937       53937   0.70  Very Good     D     SI1   62.8   60.0   2757  5.66   
53938       53938   0.86    Premium     H     SI2   61.0   58.0   2757  6.15   
53939       53939   0.75      Ideal     D     SI2   62.2   55.0   2757  5.83   

          y     z  
0      3.98  2.43  

In [6]:
def Cut (series): 

    if series == "Ideal":
        return 5

    if series == "Premium": 
        return 4

    if series == "Very Good": 
        return 3

    if series == "Good": 
        return 2
    
    if series == "Fair":
        return 1
    

diamonds['cutR'] = diamonds['cut'].apply(Cut)


# ### Recoding Color to ColorR

# In[66]:


def Color (series): 

    if series == "D":
        return 7

    if series == "E": 
        return 6

    if series == "F": 
        return 5

    if series == "G": 
        return 4
    
    if series == "H":
        return 3
    
    if series == "I":
        return 2
    
    if series == "J":
        return 1
    

diamonds['colorR'] = diamonds['color'].apply(Color)


# ### Recoding Clarity to ClarityR

# In[67]:


def Clarity (series): 

    if series == "I1":
        return 1

    if series == "SI2": 
        return 2

    if series == "SI1": 
        return 3

    if series == "VS2": 
        return 4
    
    if series == "VS1":
        return 5
    
    if series == "VVS2":
        return 6
    
    if series == "VVS1":
        return 7
    
    if series == "IF":
        return 8
    

diamonds['clarityR'] = diamonds['clarity'].apply(Clarity)


# ### Dataset with new columns: cutR, colorR, clarityR 

# In[68]:


print(diamonds)


       Unnamed: 0  carat        cut color clarity  depth  table  price     x  \
0               0   0.23      Ideal     E     SI2   61.5   55.0    326  3.95   
1               1   0.21    Premium     E     SI1   59.8   61.0    326  3.89   
2               2   0.23       Good     E     VS1   56.9   65.0    327  4.05   
3               3   0.29    Premium     I     VS2   62.4   58.0    334  4.20   
4               4   0.31       Good     J     SI2   63.3   58.0    335  4.34   
...           ...    ...        ...   ...     ...    ...    ...    ...   ...   
53935       53935   0.72      Ideal     D     SI1   60.8   57.0   2757  5.75   
53936       53936   0.72       Good     D     SI1   63.1   55.0   2757  5.69   
53937       53937   0.70  Very Good     D     SI1   62.8   60.0   2757  5.66   
53938       53938   0.86    Premium     H     SI2   61.0   58.0   2757  6.15   
53939       53939   0.75      Ideal     D     SI2   62.2   55.0   2757  5.83   

          y     z  cutR  colorR  clarit

In [7]:
x = diamonds[['carat', 'cutR', 'colorR', 'clarityR']]
y = diamonds['price']

In [8]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = .4, random_state=101)

In [9]:
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

(32364, 4) (32364,)
(21576, 4) (21576,)


In [10]:
model = keras.Sequential(
    [
        # Input layer
        keras.layers.Input(shape=(4, )),
        # Hidden layer 1 = 256 nodes, linear activation
        keras.layers.Dense(256, activation='relu'),
        # Hidden layer 2: 128 nodes, linear activation
        keras.layers.Dense(128, activation='linear'),
         # Hidden layer 3: 64 nodes, linear activation
        keras.layers.Dense(64, activation='linear'),
        # Output layer: 1 node
        keras.layers.Dense(1, activation='linear'),
    ]
)

In [11]:
print(model.summary())

None


In [12]:
model.compile(
    optimizer='Adam',  # Optimizer
    # Loss function to minimize
    loss='MeanSquaredError',
    # List of metrics to monitor
    metrics=['mse'],
)

In [13]:
# Train the model with validation
training = model.fit(x_train, y_train, batch_size=64, epochs=200, validation_split=0.1)

Epoch 1/200
[1m456/456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 18931080.0000 - mse: 18931080.0000 - val_loss: 2652204.0000 - val_mse: 2652204.0000
Epoch 2/200
[1m456/456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 841us/step - loss: 2185940.7500 - mse: 2185940.7500 - val_loss: 1264799.3750 - val_mse: 1264799.3750
Epoch 3/200
[1m456/456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 794us/step - loss: 1151149.2500 - mse: 1151149.2500 - val_loss: 892037.8125 - val_mse: 892037.8125
Epoch 4/200
[1m456/456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 797us/step - loss: 868508.6875 - mse: 868508.6875 - val_loss: 721810.8125 - val_mse: 721810.8125
Epoch 5/200
[1m456/456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 818us/step - loss: 734117.4375 - mse: 734117.4375 - val_loss: 614185.1250 - val_mse: 614185.1250
Epoch 6/200
[1m456/456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 856us/step - loss: 644981.1250 - mse:

In [16]:
predictions = model.predict(x_test[:3], verbose=0)
print('Predictions:', predictions.round(3))
print('Actual values:', y_test[:3])

Predictions: [[1449.386]
 [4155.343]
 [ 542.688]]
Actual values: 46519    1781
8639     4452
23029     631
Name: price, dtype: int64


In [17]:
# Evaluate the model on the testing data
results = model.evaluate(x_test, y_test, batch_size=64)
print('Test loss, accuracy', results)

[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 498us/step - loss: 357741.7188 - mse: 357741.7188
Test loss, accuracy [358109.25, 358109.25]


In [20]:
from sklearn.metrics import r2_score
predictions = model.predict(x_test)
r2 = r2_score(y_test, predictions)
print(f'R-squared score: {r2:.2f}')

[1m675/675[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 482us/step
R-squared score: 0.98
