<a href="https://colab.research.google.com/github/everestso/Fall24Spring25/blob/main/pyTorch_DeepNetworks_with_DiamondsInColab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deep Learning w/ Diamonds

https://www.kaggle.com/datasets/shivam2503/diamonds

In [1]:
import numpy as np
import pandas as pd

import time

In [2]:
!gdown 1-OzEC9axnc2fTMMcHCXnBkQzypluJLLG

file_name = 'diamonds.csv'

df = pd.read_csv(file_name)

df.head()
print (df.shape)

Downloading...
From: https://drive.google.com/uc?id=1-OzEC9axnc2fTMMcHCXnBkQzypluJLLG
To: /content/diamonds.csv
  0% 0.00/2.57M [00:00<?, ?B/s]100% 2.57M/2.57M [00:00<00:00, 267MB/s]
(53940, 10)


In [38]:
#diamond_sample = df.sample(30, random_state=12)
diamond_sample = df.copy()
print(f"{diamond_sample.shape=}")

# Create a dataframe X containing all the features except carat, cut, and color
drop_columns = ['depth', 'table', 'x', 'y', 'z', 'clarity', 'price']
X = diamond_sample.drop(drop_columns, axis=1)
print(f"{X.columns=}, {X.shape=}")
print(f"{X[:3]}")


# Create a dataframe y containing the feature price
y = diamond_sample[['price']]
print(f"{y.columns}")
print(f"{y.iloc[:3]=}")

diamond_sample.shape=(53940, 10)
X.columns=Index(['carat', 'cut', 'color'], dtype='object'), X.shape=(53940, 3)
   carat      cut color
0   0.23    Ideal     E
1   0.21  Premium     E
2   0.23     Good     E
Index(['price'], dtype='object')
y.iloc[:3]=   price
0    326
1    326
2    327


## Transforms

In [39]:
from sklearn.model_selection import train_test_split
# Define a standardization scaler to transform values
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OrdinalEncoder

# Split data into train and test sets
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.3, random_state=123)

# Define a dictionary for quality mapping
cut_quality = {'Fair': 1, 'Good': 2, 'Very Good': 3, 'Premium': 4, 'Ideal': 5}

# Apply the mapping to Xtrain and Xtest
Xtrain['cut'] = Xtrain['cut'].map(cut_quality)
Xtest['cut'] = Xtest['cut'].map(cut_quality)

# Create an OrdinalEncoder for the 'color' column
enc = OrdinalEncoder(categories=[['J', 'I', 'H', 'G', 'F', 'E', 'D']]) # Define the order of categories

# Fit the encoder on the training data and transform both training and testing data
Xtrain['color'] = enc.fit_transform(Xtrain[['color']])
Xtest['color'] = enc.transform(Xtest[['color']])

Transform = StandardScaler()

# Apply scaler
Xtrain_scaled = Transform.fit_transform(Xtrain)
Xtest_scaled = Transform.transform(Xtest)

print  (Xtrain_scaled)

[[ 0.55116139  0.97923439 -0.82538596]
 [-0.84330851  0.97923439  0.34923047]
 [-0.82218018  0.08287491  0.93653868]
 ...
 [-1.05459183  0.97923439  0.93653868]
 [ 0.4243914   0.97923439 -0.23807775]
 [-0.18833022 -0.81348456 -1.41269417]]


In [40]:
# prompt: Print the transform formula details

print(f"Transform.mean_ = {Transform.mean_}")
print(f"Transform.scale_ = {Transform.scale_}")

print (f"{Xtrain.columns[0]=}, {Transform.mean_[0]=}, {Transform.scale_[0]=}, {np.std(Xtrain[Xtrain.columns[0]])=}")
print (f"{Xtrain.iloc[0,0]=}, {(Xtrain.iloc[0,0]-Transform.mean_[0])/Transform.scale_[0]=}, {Xtrain_scaled[0][0]=}")
print (f"{np.mean(Xtrain, axis=0)=}")
print (f"{np.std(Xtrain, axis=0)=}")

Transform.mean_ = [0.79913634 3.90754277 3.40537105]
Transform.scale_ = [0.47329813 1.11562384 1.70268349]
Xtrain.columns[0]='carat', Transform.mean_[0]=0.7991363419672652, Transform.scale_[0]=0.47329813202512444, np.std(Xtrain[Xtrain.columns[0]])=0.47329813202512444
Xtrain.iloc[0,0]=1.06, (Xtrain.iloc[0,0]-Transform.mean_[0])/Transform.scale_[0]=0.5511613935946977, Xtrain_scaled[0][0]=0.5511613935946977
np.mean(Xtrain, axis=0)=carat    0.799136
cut      3.907543
color    3.405371
dtype: float64
np.std(Xtrain, axis=0)=carat    0.473298
cut      1.115624
color    1.702683
dtype: float64


# Now pyTorch

https://aibyhand.substack.com/p/6-can-you-code-a-multi-layer-perceptron

In [41]:
import torch
import torch.nn as nn
import torch.optim as optim
status = 10

# Define the model structure using nn.Sequential
model = nn.Sequential(
    nn.Linear(3, 4, bias=True),
    nn.ReLU(),
    nn.Linear(4, 2, bias = False),
    nn.ReLU(),
    nn.Linear(2, 5, bias=True),
    nn.ReLU(),
    nn.Linear(5, 1)
)

# Define the optimizer and loss function
optimizer = optim.Adam(model.parameters())
criterion = nn.MSELoss()

# Convert data to PyTorch tensors
Xtrain_tensor = torch.tensor(Xtrain_scaled, dtype=torch.float32)
ytrain_tensor = torch.tensor(ytrain.values, dtype=torch.float32)

# Train the model
epochs = 130
batch_size = 100

start_time = time.time()
for epoch in range(epochs):
  for i in range(0, len(Xtrain_tensor), batch_size):
    # Get batch of data
    Xbatch = Xtrain_tensor[i:i+batch_size]
    ybatch = ytrain_tensor[i:i+batch_size]

    # Forward pass
    outputs = model(Xbatch)
    loss = criterion(outputs, ybatch)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  if ((epoch+1)%10==0):
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Training time ({epoch+1}): {elapsed_time=} seconds, Loss: {loss.item():.4f}") # Print loss with training time

end_time = time.time()
elapsed_time = end_time - start_time
print(f"Training time ({epoch+1}): {elapsed_time=} seconds")

# Convert test data to PyTorch tensors
Xtest_tensor = torch.tensor(Xtest.values, dtype=torch.float32)

# Make predictions
predictions = model(Xtest_tensor[:3])

# Convert predictions to numpy array
predictions = predictions.detach().numpy()

print('Predictions:', predictions.round(3))
print('Actual values:', ytest[:3])

Training time (10): elapsed_time=4.3379435539245605 seconds, Loss: 961474.9375
Training time (20): elapsed_time=8.670960187911987 seconds, Loss: 872019.0625
Training time (30): elapsed_time=13.118133068084717 seconds, Loss: 845679.5000
Training time (40): elapsed_time=17.429007053375244 seconds, Loss: 840067.0625
Training time (50): elapsed_time=21.805064916610718 seconds, Loss: 830470.2500
Training time (60): elapsed_time=26.454598665237427 seconds, Loss: 822546.6875
Training time (70): elapsed_time=30.799639463424683 seconds, Loss: 817568.1250
Training time (80): elapsed_time=35.28777742385864 seconds, Loss: 816712.6875
Training time (90): elapsed_time=39.657697677612305 seconds, Loss: 813717.4375
Training time (100): elapsed_time=43.94639849662781 seconds, Loss: 811830.5000
Training time (110): elapsed_time=48.30076551437378 seconds, Loss: 810532.0625
Training time (120): elapsed_time=52.560551404953 seconds, Loss: 809776.5000
Training time (130): elapsed_time=56.88023924827576 seco

In [42]:
from sklearn.metrics import r2_score

# Assuming 'model' is your trained Keras model, 'Xtest' is your test data, and 'ytest' are the true target values
print (f"{Xtrain.shape=}")

# Convert Xtrain to PyTorch tensor
Xtrain_tensor = torch.tensor(Xtrain_scaled, dtype=torch.float32)
y_predicted = model(Xtrain_tensor)

# Convert predictions back to numpy array for r2_score
y_predicted = y_predicted.detach().numpy()
r2 = r2_score(ytrain, y_predicted)
print("R-squared Train:", r2)

# Convert Xtest to PyTorch tensor
Xtest_tensor = torch.tensor(Xtest_scaled, dtype=torch.float32)
y_predicted = model(Xtest_tensor)

# Convert predictions back to numpy array for r2_score
y_predicted = y_predicted.detach().numpy()
r2 = r2_score(ytest, y_predicted)
print("R-squared Test:", r2)

Xtrain.shape=(37758, 3)
R-squared Train: 0.8859268504506055
R-squared Test: 0.8853695330034252


# Try 4 C's


In [47]:
#diamond_sample = df.sample(30, random_state=12)
diamond_sample = df.copy()
print(f"{diamond_sample.shape=}")

# Create a dataframe X containing all the features except carat, cut, color, clarity.
drop_columns = ['depth', 'table', 'x', 'y', 'z', 'price']
X = diamond_sample.drop(drop_columns, axis=1)
print(f"{X.columns=}, {X.shape=}")
print(f"{X[:3]}")


# Create a dataframe y containing the feature price
y = diamond_sample[['price']]
print(f"{y.columns}")
print(f"{y.iloc[:3]=}")

diamond_sample.shape=(53940, 10)
X.columns=Index(['carat', 'cut', 'color', 'clarity'], dtype='object'), X.shape=(53940, 4)
   carat      cut color clarity
0   0.23    Ideal     E     SI2
1   0.21  Premium     E     SI1
2   0.23     Good     E     VS1
Index(['price'], dtype='object')
y.iloc[:3]=   price
0    326
1    326
2    327


In [48]:
# Split data into train and test sets
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.3, random_state=123)

# Define a dictionary for quality mapping
cut_quality = {'Fair': 1, 'Good': 2, 'Very Good': 3, 'Premium': 4, 'Ideal': 5}

# Apply the mapping to Xtrain and Xtest
Xtrain['cut'] = Xtrain['cut'].map(cut_quality)
Xtest['cut'] = Xtest['cut'].map(cut_quality)

# Create an OrdinalEncoder for the 'color' column
enc = OrdinalEncoder(categories=[['J', 'I', 'H', 'G', 'F', 'E', 'D']]) # Define the order of categories

# Fit the encoder on the training data and transform both training and testing data
Xtrain['color'] = enc.fit_transform(Xtrain[['color']])
Xtest['color'] = enc.transform(Xtest[['color']])

# Create an OrdinalEncoder for the 'clarity' column
clarity_enc = OrdinalEncoder(categories=[['I1', 'SI2', 'SI1', 'VS2', 'VS1', 'VVS2', 'VVS1', 'IF']])

# Fit the encoder on the training data and transform both training and testing data
Xtrain['clarity'] = clarity_enc.fit_transform(Xtrain[['clarity']])
Xtest['clarity'] = clarity_enc.transform(Xtest[['clarity']])

Transform = StandardScaler()

# Apply scaler
Xtrain_scaled = Transform.fit_transform(Xtrain)
Xtest_scaled = Transform.transform(Xtest)

print  (Xtrain_scaled)

[[ 0.55116139  0.97923439 -0.82538596 -0.02809222]
 [-0.84330851  0.97923439  0.34923047  1.18761651]
 [-0.82218018  0.08287491  0.93653868 -0.63594658]
 ...
 [-1.05459183  0.97923439  0.93653868 -0.02809222]
 [ 0.4243914   0.97923439 -0.23807775  0.57976215]
 [-0.18833022 -0.81348456 -1.41269417  1.79547088]]


In [49]:
import torch
import torch.nn as nn
import torch.optim as optim
status = 10

# Define the model structure using nn.Sequential
model = nn.Sequential(
    nn.Linear(4, 4, bias=True),
    nn.ReLU(),
    nn.Linear(4, 2, bias = False),
    nn.ReLU(),
    nn.Linear(2, 5, bias=True),
    nn.ReLU(),
    nn.Linear(5, 1)
)

# Define the optimizer and loss function
optimizer = optim.Adam(model.parameters())
criterion = nn.MSELoss()

# Convert data to PyTorch tensors
Xtrain_tensor = torch.tensor(Xtrain_scaled, dtype=torch.float32)
ytrain_tensor = torch.tensor(ytrain.values, dtype=torch.float32)

# Train the model
epochs = 130
batch_size = 100

start_time = time.time()
for epoch in range(epochs):
  for i in range(0, len(Xtrain_tensor), batch_size):
    # Get batch of data
    Xbatch = Xtrain_tensor[i:i+batch_size]
    ybatch = ytrain_tensor[i:i+batch_size]

    # Forward pass
    outputs = model(Xbatch)
    loss = criterion(outputs, ybatch)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  if ((epoch+1)%10==0):
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Training time ({epoch+1}): {elapsed_time=} seconds, Loss: {loss.item():.4f}") # Print loss with training time

end_time = time.time()
elapsed_time = end_time - start_time
print(f"Training time ({epoch+1}): {elapsed_time=} seconds")

# Convert test data to PyTorch tensors
Xtest_tensor = torch.tensor(Xtest.values, dtype=torch.float32)

# Make predictions
predictions = model(Xtest_tensor[:3])

# Convert predictions to numpy array
predictions = predictions.detach().numpy()

print('Predictions:', predictions.round(3))
print('Actual values:', ytest[:3])

Training time (10): elapsed_time=4.6828391551971436 seconds, Loss: 623406.7500
Training time (20): elapsed_time=9.145548343658447 seconds, Loss: 518124.0000
Training time (30): elapsed_time=13.662511825561523 seconds, Loss: 489427.9375
Training time (40): elapsed_time=18.270651817321777 seconds, Loss: 512963.6562
Training time (50): elapsed_time=22.954044342041016 seconds, Loss: 515371.4688
Training time (60): elapsed_time=27.534481525421143 seconds, Loss: 506927.0938
Training time (70): elapsed_time=31.873916149139404 seconds, Loss: 504640.6875
Training time (80): elapsed_time=36.25572609901428 seconds, Loss: 503994.8750
Training time (90): elapsed_time=40.5290789604187 seconds, Loss: 503789.7188
Training time (100): elapsed_time=44.806037187576294 seconds, Loss: 503736.5312
Training time (110): elapsed_time=49.23063039779663 seconds, Loss: 503801.0938
Training time (120): elapsed_time=53.63481664657593 seconds, Loss: 503903.7188
Training time (130): elapsed_time=58.04401445388794 sec

In [50]:
from sklearn.metrics import r2_score

# Assuming 'model' is your trained Keras model, 'Xtest' is your test data, and 'ytest' are the true target values
print (f"{Xtrain_scaled.shape=}")

# Convert Xtrain to PyTorch tensor
Xtrain_tensor = torch.tensor(Xtrain_scaled, dtype=torch.float32)
y_predicted = model(Xtrain_tensor)

# Convert predictions back to numpy array for r2_score
y_predicted = y_predicted.detach().numpy()
r2 = r2_score(ytrain, y_predicted)
print("R-squared Train:", r2)

# Convert Xtest to PyTorch tensor
Xtest_tensor = torch.tensor(Xtest_scaled, dtype=torch.float32)
y_predicted = model(Xtest_tensor)

# Convert predictions back to numpy array for r2_score
y_predicted = y_predicted.detach().numpy()
r2 = r2_score(ytest, y_predicted)
print("R-squared Test:", r2)

Xtrain_scaled.shape=(37758, 4)
R-squared Train: 0.9436782179350288
R-squared Test: 0.9421347493127947


# Try more features


In [43]:
#diamond_sample = df.sample(30, random_state=12)
diamond_sample = df.copy()
print(f"{diamond_sample.shape=}")

# Create a dataframe X containing all the features except cut, color, clarity, and price
drop_columns = ['price']
X = diamond_sample.drop(drop_columns, axis=1)
print(f"{X.columns=}, {X.shape=}")
print(f"{X[:3]}")


# Create a dataframe y containing the feature price
y = diamond_sample[['price']]
print(f"{y.columns}")
print(f"{y.iloc[:3]=}")

diamond_sample.shape=(53940, 10)
X.columns=Index(['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'x', 'y', 'z'], dtype='object'), X.shape=(53940, 9)
   carat      cut color clarity  depth  table     x     y     z
0   0.23    Ideal     E     SI2   61.5   55.0  3.95  3.98  2.43
1   0.21  Premium     E     SI1   59.8   61.0  3.89  3.84  2.31
2   0.23     Good     E     VS1   56.9   65.0  4.05  4.07  2.31
Index(['price'], dtype='object')
y.iloc[:3]=   price
0    326
1    326
2    327


In [44]:
# Split data into train and test sets
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.3, random_state=123)

# Define a dictionary for quality mapping
cut_quality = {'Fair': 1, 'Good': 2, 'Very Good': 3, 'Premium': 4, 'Ideal': 5}

# Apply the mapping to Xtrain and Xtest
Xtrain['cut'] = Xtrain['cut'].map(cut_quality)
Xtest['cut'] = Xtest['cut'].map(cut_quality)

# Create an OrdinalEncoder for the 'color' column
enc = OrdinalEncoder(categories=[['J', 'I', 'H', 'G', 'F', 'E', 'D']]) # Define the order of categories

# Fit the encoder on the training data and transform both training and testing data
Xtrain['color'] = enc.fit_transform(Xtrain[['color']])
Xtest['color'] = enc.transform(Xtest[['color']])

# Create an OrdinalEncoder for the 'clarity' column
clarity_enc = OrdinalEncoder(categories=[['I1', 'SI2', 'SI1', 'VS2', 'VS1', 'VVS2', 'VVS1', 'IF']])

# Fit the encoder on the training data and transform both training and testing data
Xtrain['clarity'] = clarity_enc.fit_transform(Xtrain[['clarity']])
Xtest['clarity'] = clarity_enc.transform(Xtest[['clarity']])

Transform = StandardScaler()

# Apply scaler
Xtrain_scaled = Transform.fit_transform(Xtrain)
Xtest_scaled = Transform.transform(Xtest)

print  (Xtrain_scaled)

[[ 0.55116139  0.97923439 -0.82538596 ...  0.79886224  0.74694202
   0.64630735]
 [-0.84330851  0.97923439  0.34923047 ... -0.87788733 -0.83947701
  -0.81830926]
 [-0.82218018  0.08287491  0.93653868 ... -0.77086076 -0.77012536
  -0.95913778]
 ...
 [-1.05459183  0.97923439  0.93653868 ... -1.27923696 -1.22091109
  -1.21262911]
 [ 0.4243914   0.97923439 -0.23807775 ...  0.55805246  0.50421124
   0.6322245 ]
 [-0.18833022 -0.81348456 -1.41269417 ... -0.07518807 -0.04193302
   0.06891042]]


In [45]:
import torch
import torch.nn as nn
import torch.optim as optim
status = 10

# Define the model structure using nn.Sequential
model = nn.Sequential(
    nn.Linear(9, 4, bias=True),
    nn.ReLU(),
    nn.Linear(4, 2, bias = False),
    nn.ReLU(),
    nn.Linear(2, 5, bias=True),
    nn.ReLU(),
    nn.Linear(5, 1)
)

# Define the optimizer and loss function
optimizer = optim.Adam(model.parameters())
criterion = nn.MSELoss()

# Convert data to PyTorch tensors
Xtrain_tensor = torch.tensor(Xtrain_scaled, dtype=torch.float32)
ytrain_tensor = torch.tensor(ytrain.values, dtype=torch.float32)

# Train the model
epochs = 130
batch_size = 100

start_time = time.time()
for epoch in range(epochs):
  for i in range(0, len(Xtrain_tensor), batch_size):
    # Get batch of data
    Xbatch = Xtrain_tensor[i:i+batch_size]
    ybatch = ytrain_tensor[i:i+batch_size]

    # Forward pass
    outputs = model(Xbatch)
    loss = criterion(outputs, ybatch)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  if ((epoch+1)%10==0):
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Training time ({epoch+1}): {elapsed_time=} seconds, Loss: {loss.item():.4f}") # Print loss with training time

end_time = time.time()
elapsed_time = end_time - start_time
print(f"Training time ({epoch+1}): {elapsed_time=} seconds")

# Convert test data to PyTorch tensors
Xtest_tensor = torch.tensor(Xtest.values, dtype=torch.float32)

# Make predictions
predictions = model(Xtest_tensor[:3])

# Convert predictions to numpy array
predictions = predictions.detach().numpy()

print('Predictions:', predictions.round(3))
print('Actual values:', ytest[:3])

Training time (10): elapsed_time=4.277407646179199 seconds, Loss: 562195.3750
Training time (20): elapsed_time=8.632644891738892 seconds, Loss: 639430.6875
Training time (30): elapsed_time=12.931382656097412 seconds, Loss: 623241.6250
Training time (40): elapsed_time=17.280531883239746 seconds, Loss: 583849.2500
Training time (50): elapsed_time=21.61224365234375 seconds, Loss: 525784.4375
Training time (60): elapsed_time=25.880529642105103 seconds, Loss: 500532.4688
Training time (70): elapsed_time=30.201783895492554 seconds, Loss: 496160.7500
Training time (80): elapsed_time=34.451969385147095 seconds, Loss: 494125.3750
Training time (90): elapsed_time=38.69642782211304 seconds, Loss: 503100.6250
Training time (100): elapsed_time=42.98260140419006 seconds, Loss: 517033.3750
Training time (110): elapsed_time=47.37083554267883 seconds, Loss: 526923.3125
Training time (120): elapsed_time=51.731287717819214 seconds, Loss: 526414.5000
Training time (130): elapsed_time=56.03997850418091 sec

In [46]:
from sklearn.metrics import r2_score

# Assuming 'model' is your trained Keras model, 'Xtest' is your test data, and 'ytest' are the true target values
print (f"{Xtrain_scaled.shape=}")

# Convert Xtrain to PyTorch tensor
Xtrain_tensor = torch.tensor(Xtrain_scaled, dtype=torch.float32)
y_predicted = model(Xtrain_tensor)

# Convert predictions back to numpy array for r2_score
y_predicted = y_predicted.detach().numpy()
r2 = r2_score(ytrain, y_predicted)
print("R-squared Train:", r2)

# Convert Xtest to PyTorch tensor
Xtest_tensor = torch.tensor(Xtest_scaled, dtype=torch.float32)
y_predicted = model(Xtest_tensor)

# Convert predictions back to numpy array for r2_score
y_predicted = y_predicted.detach().numpy()
r2 = r2_score(ytest, y_predicted)
print("R-squared Test:", r2)

Xtrain_scaled.shape=(37758, 9)
R-squared Train: 0.9524080304932849
R-squared Test: 0.9534701339094086
