In [132]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss, r2_score
from torchsummary import summary
import matplotlib.pyplot as plt

In [92]:
cancer = pd.read_csv("./Cases/Wisconsin/BreastCancer.csv", index_col=0)
cancer.head()

Unnamed: 0_level_0,Clump,UniCell_Size,Uni_CellShape,MargAdh,SEpith,BareN,BChromatin,NoemN,Mitoses,Class
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
61634,5,4,3,1,2,2,2,3,1,Benign
63375,9,1,2,6,4,10,7,7,2,Malignant
76389,10,4,7,2,2,8,6,1,1,Malignant
95719,6,10,10,10,8,10,7,10,7,Malignant
128059,1,1,1,1,2,5,5,1,1,Benign


In [93]:
lbl = LabelEncoder()
X = cancer.drop("Class", axis=1)
y = lbl.fit_transform(cancer['Class'])
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=24, stratify=y)

In [94]:
scaler = MinMaxScaler()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=24,stratify=y)
X_scl_trn = scaler.fit_transform(X_train) 
X_scl_tst = scaler.transform(X_test) 



In [95]:
X_torch = torch.from_numpy(X_scl_trn)
y_torch = torch.from_numpy(y_train)


print(X_torch.size())
print(y_torch.size())

torch.Size([489, 9])
torch.Size([489])


# Model Definition

In [96]:
torch.manual_seed(24)
model = nn.Sequential(nn.Linear(in_features=X_scl_trn.shape[1], out_features=1))

summary(model, (1,X_scl_trn.shape[1]))

Layer (type:depth-idx)                   Output Shape              Param #
├─Linear: 1-1                            [-1, 1, 1]                10
Total params: 10
Trainable params: 10
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00


Layer (type:depth-idx)                   Output Shape              Param #
├─Linear: 1-1                            [-1, 1, 1]                10
Total params: 10
Trainable params: 10
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00

In [97]:
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.9)
optimizer

SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 0.9
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)

In [98]:
for p in model.parameters():
    print(p)



Parameter containing:
tensor([[ 0.1763, -0.0833, -0.2833,  0.0205,  0.3107, -0.1487, -0.1085,  0.2607,
         -0.0464]], requires_grad=True)
Parameter containing:
tensor([-0.1273], requires_grad=True)


In [99]:
y_pred = model(X_torch.float())
y_pred[:3]

tensor([[-0.0459],
        [-0.2071],
        [-0.0973]], grad_fn=<SliceBackward0>)

In [100]:
y_torch = y_torch.unsqueeze(1)
print(y_torch.shape)
print(y_pred.shape)

torch.Size([489, 1])
torch.Size([489, 1])


# Initial Log Loss

In [101]:
for epoch in np.arange(0,1000):
       # Forward pass: Compute predicted y by passing x to the model
       y_pred_prob = model(X_torch.float())

       # Compute and print loss
       loss = criterion(y_pred_prob, y_torch.float())
       if epoch%100 == 0:
          print('epoch: ', epoch+1,' loss: ', loss.item())

       # Zero gradients, perform a backward pass, and update the weights.
       optimizer.zero_grad()

       # perform a backward pass (backpropagation)
       loss.backward()

       # Update the parameters
       optimizer.step()
#print('epoch: ', epoch+1,' loss: ', loss.item())

epoch:  1  loss:  0.6887467503547668
epoch:  101  loss:  0.12147819250822067
epoch:  201  loss:  0.10393854230642319
epoch:  301  loss:  0.09798214584589005
epoch:  401  loss:  0.09485926479101181
epoch:  501  loss:  0.09286675602197647
epoch:  601  loss:  0.09145435690879822
epoch:  701  loss:  0.09038924425840378
epoch:  801  loss:  0.0895538181066513
epoch:  901  loss:  0.08888058364391327


In [102]:
X_torch_test = torch.from_numpy(X_scl_tst)

In [103]:
### Inferencing on test set
lin_output = model(X_torch_test.float()) # Equivalent predict_proba / predict

np_out = lin_output.detach().numpy()
y_pred_prob = 1 / (1 + np.exp(-np_out))

In [104]:
y_pred_prob = y_pred_prob.reshape(y_test.shape[0],) 

y_pred = np.where(y_pred_prob >= 0.5,1,0)

# Test set Accuracy score

In [105]:

print(accuracy_score(y_test, y_pred))

0.9714285714285714


# Test Set Log Loss

In [None]:

log_loss(y_test, y_pred_prob)

0.08602577466759474

## Sonar Dataset

In [107]:
sonar = pd.read_csv("./Cases/Sonar/Sonar.csv")
sonar.head()

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V52,V53,V54,V55,V56,V57,V58,V59,V60,Class
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R


In [108]:
lbl = LabelEncoder()
X = sonar.drop("Class", axis=1)
y = lbl.fit_transform(sonar['Class'])
X_train, X_test, y_train, y_test = train_test_split(X.values, y, test_size = 0.3, random_state=24,stratify=y)


In [109]:
X_torch = torch.from_numpy(X_train)
y_torch = torch.from_numpy(y_train)


print(X_torch.size())
print(y_torch.size())

torch.Size([145, 60])
torch.Size([145])


In [110]:
torch.manual_seed(24)
model = nn.Sequential(nn.Linear(in_features=X_train.shape[1], out_features=1))

summary(model, (1,X_train.shape[1]))

Layer (type:depth-idx)                   Output Shape              Param #
├─Linear: 1-1                            [-1, 1, 1]                61
Total params: 61
Trainable params: 61
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00


Layer (type:depth-idx)                   Output Shape              Param #
├─Linear: 1-1                            [-1, 1, 1]                61
Total params: 61
Trainable params: 61
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00

In [111]:
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.1)
optimizer

SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 0.1
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)

In [112]:
for p in model.parameters():
    print(p)

Parameter containing:
tensor([[ 0.0683, -0.0323, -0.1097,  0.0079,  0.1203, -0.0576, -0.0420,  0.1010,
         -0.0180, -0.0493, -0.0260,  0.0047, -0.1052, -0.0369,  0.1252, -0.0277,
          0.1176, -0.0504, -0.0519, -0.0385, -0.1154, -0.0778,  0.0780, -0.0969,
          0.0441,  0.1215, -0.0164,  0.0565,  0.0603, -0.0922, -0.0498, -0.0404,
          0.1246, -0.1152,  0.0781,  0.1213,  0.1021,  0.0550,  0.0575, -0.0069,
          0.0650,  0.1177, -0.0415,  0.1247,  0.0205, -0.0905, -0.0927,  0.0455,
         -0.0182, -0.0569,  0.0654, -0.0681, -0.0617,  0.0241, -0.0233, -0.1089,
         -0.0388, -0.0454, -0.0550,  0.1005]], requires_grad=True)
Parameter containing:
tensor([-0.0196], requires_grad=True)


In [113]:
y_pred = model(X_torch.float())
y_pred[:3]

tensor([[0.1534],
        [0.3342],
        [0.0447]], grad_fn=<SliceBackward0>)

In [114]:
y_torch = y_torch.unsqueeze(1)
print(y_torch.shape)
print(y_pred.shape)

torch.Size([145, 1])
torch.Size([145, 1])


In [115]:
for epoch in np.arange(0,1000):
       # Forward pass: Compute predicted y by passing x to the model
       y_pred_prob = model(X_torch.float())

       # Compute and print loss
       loss = criterion(y_pred_prob, y_torch.float())
       if epoch%100 == 0:
          print('epoch: ', epoch+1,' loss: ', loss.item())

       # Zero gradients, perform a backward pass, and update the weights.
       optimizer.zero_grad()

       # perform a backward pass (backpropagation)
       loss.backward()

       # Update the parameters
       optimizer.step()
#print('epoch: ', epoch+1,' loss: ', loss.item())

epoch:  1  loss:  0.6839818954467773
epoch:  101  loss:  0.6076671481132507
epoch:  201  loss:  0.5660024881362915
epoch:  301  loss:  0.5375736355781555
epoch:  401  loss:  0.5169585943222046
epoch:  501  loss:  0.50129634141922
epoch:  601  loss:  0.4889477491378784
epoch:  701  loss:  0.47891560196876526
epoch:  801  loss:  0.4705631136894226
epoch:  901  loss:  0.4634658694267273


In [116]:
X_torch_test = torch.from_numpy(X_test)

In [117]:
### Inferencing on test set
lin_output = model(X_torch_test.float()) # Equivalent predict_proba / predict

np_out = lin_output.detach().numpy()
y_pred_prob = 1 / (1 + np.exp(-np_out))

In [118]:
y_pred_prob = y_pred_prob.reshape(y_test.shape[0],) 

y_pred = np.where(y_pred_prob >= 0.5,1,0)

In [119]:
print(accuracy_score(y_test, y_pred))

0.7301587301587301


In [120]:
log_loss(y_test, y_pred_prob)

0.48607733017880606

## Concrete Dataset

In [121]:
df = pd.read_csv("./Cases/Concrete Strength/Concrete_Data.csv")
X = df.drop('Strength', axis=1).values
y = df['Strength'].values

In [122]:
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=24)
X_scl_trn = scaler_X.fit_transform(X_train)
X_scl_tst = scaler_X.transform(X_test)
y_scl_trn = scaler_y.fit_transform(y_train.reshape(-1,1))
y_scl_tst = scaler_y.transform(y_test.reshape(-1,1))

In [123]:
X_torch = torch.from_numpy(X_scl_trn)
y_torch = torch.from_numpy(y_scl_trn)


print(X_torch.size())
print(y_torch.size())

torch.Size([721, 8])
torch.Size([721, 1])


In [125]:
torch.manual_seed(24)
model = nn.Sequential(nn.Linear(in_features=X_scl_trn.shape[1], out_features=1))

summary(model, (1,X_scl_trn.shape[1]))
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.9)
optimizer

Layer (type:depth-idx)                   Output Shape              Param #
├─Linear: 1-1                            [-1, 1, 1]                9
Total params: 9
Trainable params: 9
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00


SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 0.9
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)

In [126]:
for p in model.parameters():
    print(p)

Parameter containing:
tensor([[ 0.1870, -0.0884, -0.3005,  0.0218,  0.3295, -0.1577, -0.1151,  0.2765]],
       requires_grad=True)
Parameter containing:
tensor([-0.0492], requires_grad=True)


In [127]:
y_pred = model(X_torch.float())
y_pred[:3]

tensor([[ 0.0328],
        [-0.2980],
        [-0.1653]], grad_fn=<SliceBackward0>)

In [128]:
for epoch in np.arange(0,1000):
       # Forward pass: Compute predicted y by passing x to the model
       y_pred_prob = model(X_torch.float())

       # Compute and print loss
       loss = criterion(y_pred_prob, y_torch.float())
       if epoch%100 == 0:
          print('epoch: ', epoch+1,' loss: ', loss.item())

       # Zero gradients, perform a backward pass, and update the weights.
       optimizer.zero_grad()

       # perform a backward pass (backpropagation)
       loss.backward()

       # Update the parameters
       optimizer.step()
#print('epoch: ', epoch+1,' loss: ', loss.item())

epoch:  1  loss:  0.6728695631027222
epoch:  101  loss:  0.6345343589782715
epoch:  201  loss:  0.627467930316925
epoch:  301  loss:  0.624991238117218
epoch:  401  loss:  0.623862624168396
epoch:  501  loss:  0.6232430338859558
epoch:  601  loss:  0.6228533387184143
epoch:  701  loss:  0.6225841045379639
epoch:  801  loss:  0.6223867535591125
epoch:  901  loss:  0.6222362518310547


In [129]:
X_torch_test = torch.from_numpy(X_scl_tst)

In [130]:
### Inferencing on test set
lin_output = model(X_torch_test.float()) # Equivalent predict_proba / predict

np_out = lin_output.detach().numpy()
y_pred_prob = 1 / (1 + np.exp(-np_out))

In [131]:
y_pred_prob = y_pred_prob.reshape(y_test.shape[0],) 

y_pred = np.where(y_pred_prob >= 0.5,1,0)

In [134]:
print(r2_score(y_test, y_pred))


-4.685996918163576


## Glass Dataset

In [None]:
import eng_to_ipa
from difflib import ndiff

def get_ipa(text):
    # Convert the string to IPA using eng_to_ipa library
    ipa = eng_to_ipa.convert(text)
    return ipa

def compare_ipa(text1, text2):
    ipa1 = get_ipa(text1)
    ipa2 = get_ipa(text2)
    
    # Use ndiff to get the differences between two IPA strings
    diff = list(ndiff(ipa1, ipa2))
    
    return diff

# Example usage
text1 = "hello how are you today"
text2 = "hallo and how are you today?"

diff = compare_ipa(text1, text2)

# Print the IPA transcriptions and differences
print(f"IPA1: {get_ipa(text1)}")
print(f"IPA2: {get_ipa(text2)}")
print("Differences:")
print("\n".join(diff))


IPA1: hɛˈloʊ haʊ ər ju təˈdeɪ
IPA2: hallo* ənd haʊ ər ju təˈdeɪ?
Differences:
  h
- ɛ
- ˈ
+ a
+ l
  l
  o
- ʊ
+ *
+  
+ ə
+ n
+ d
   
  h
  a
  ʊ
   
  ə
  r
   
  j
  u
   
  t
  ə
  ˈ
  d
  e
  ɪ
+ ?
