In [1]:
import torch
from sklearn.datasets import make_classification

In [2]:
# create a synthetic classification using sklearn
X , y = make_classification(
    n_samples=10, # No of samples
    n_features=2, # No. of features
    n_informative=2, # No. of informative features
    n_redundant=0, # No. of redundant features
    n_classes=2, # No. of classes
    random_state=42 # For reproducibility
)

In [3]:
X

array([[ 1.06833894, -0.97007347],
       [-1.14021544, -0.83879234],
       [-2.8953973 ,  1.97686236],
       [-0.72063436, -0.96059253],
       [-1.96287438, -0.99225135],
       [-0.9382051 , -0.54304815],
       [ 1.72725924, -1.18582677],
       [ 1.77736657,  1.51157598],
       [ 1.89969252,  0.83444483],
       [-0.58723065, -1.97171753]])

In [4]:
print(X.shape)
print(y)
print(y.shape)

(10, 2)
[1 0 0 0 0 1 1 1 1 0]
(10,)


In [8]:
# convert the data in pytorch tensor
X_torch_tensor = torch.tensor(X, dtype = torch.float32)
y_torch_tensor = torch.tensor(y, dtype = torch.long)

In [9]:
from torch.utils.data import DataLoader, Dataset

In [10]:
class CustomDataset(Dataset):
    def __init__(self,features,labels):
        self.features = features
        self.labels = labels

        # super().__init__()
    
    def __getitem__(self, index):
        return self.features[index],self.labels[index]
    
    def __len__(self):
        return self.features.shape[0]

In [14]:
dataset = CustomDataset(X,y)
len(dataset)
dataset[0]

(array([ 1.06833894, -0.97007347]), np.int64(1))

In [16]:
dataloader = DataLoader(dataset,batch_size=2,shuffle=True)



In [18]:
for batch_features,batch_label in dataloader:
    print(batch_features)
    print(batch_label)
    print("-"*50)

tensor([[-0.7206, -0.9606],
        [-2.8954,  1.9769]], dtype=torch.float64)
tensor([0, 0])
--------------------------------------------------
tensor([[ 1.0683, -0.9701],
        [-0.9382, -0.5430]], dtype=torch.float64)
tensor([1, 1])
--------------------------------------------------
tensor([[-1.9629, -0.9923],
        [ 1.7273, -1.1858]], dtype=torch.float64)
tensor([0, 1])
--------------------------------------------------
tensor([[-1.1402, -0.8388],
        [ 1.7774,  1.5116]], dtype=torch.float64)
tensor([0, 1])
--------------------------------------------------
tensor([[ 1.8997,  0.8344],
        [-0.5872, -1.9717]], dtype=torch.float64)
tensor([1, 0])
--------------------------------------------------


In [19]:
# Pytorch training pipeline using pytorch
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

In [20]:
df = pd.read_csv('breast-cancer.csv')
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [21]:
df.shape
# Remove the unnescessary column 
df.drop(columns=['id'],inplace = True)

In [22]:
# Train Test Split
x_train,x_test,y_train,y_test = train_test_split(df.iloc[:,1:],df.iloc[:,0],test_size = 0.3,random_state = 19)


In [23]:
scaler = StandardScaler()
X_train = scaler.fit_transform(x_train)
X_test = scaler.transform(x_test)

In [24]:
X_train

array([[ 1.79425731,  0.09221347,  1.80388116, ...,  1.56645559,
         0.1557361 , -0.21827903],
       [-0.61143809, -1.0368692 , -0.55073179, ..., -0.81968672,
        -0.31471993,  0.99611375],
       [ 0.62918019, -0.70899653,  0.64116153, ...,  0.16409505,
         0.34237603,  0.25206565],
       ...,
       [-0.14656941, -1.48480793, -0.23024398, ..., -1.40253331,
        -1.02426017, -1.21902372],
       [-0.1349477 , -1.10382911, -0.15540712, ...,  0.22037199,
         0.07089977, -0.65195278],
       [-0.83225071, -1.17078903, -0.8412003 , ..., -1.14213838,
        -0.54763423, -0.78003534]])

In [25]:
X_test

array([[-0.28312459, -0.16639034, -0.36046858, ..., -0.97619745,
        -1.45615423, -1.23124737],
       [ 1.68094557,  1.38061455,  1.66012673, ...,  0.89569529,
         0.57683281, -0.01100849],
       [ 0.07133778,  0.69485268,  0.22342808, ...,  0.9763082 ,
        -0.13424991,  1.50631815],
       ...,
       [-0.62015438,  0.55400597, -0.60485111, ..., -0.40780035,
        -0.96564598,  0.77927687],
       [-0.53008607,  0.31849179, -0.58836163, ..., -0.60842004,
        -0.36562173, -0.85444015],
       [-0.33542231,  0.5147536 , -0.35962297, ..., -0.88676275,
        -0.68337236, -0.17788785]])

In [26]:
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

In [27]:
y_train

array([1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0,
       1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1,
       1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1,
       1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1,
       1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1,
       0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,

In [28]:
y_test

array([0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1,
       1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1,
       0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1,
       1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1,
       1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0])

In [29]:
x_train_tensor = torch.from_numpy(X_train)
x_test_tensor = torch.from_numpy(X_test)
y_train_tensor = torch.from_numpy(y_train)
y_test_tensor = torch.from_numpy(y_test)

In [30]:
x_train_tensor.shape
x_train_tensor

tensor([[ 1.7943,  0.0922,  1.8039,  ...,  1.5665,  0.1557, -0.2183],
        [-0.6114, -1.0369, -0.5507,  ..., -0.8197, -0.3147,  0.9961],
        [ 0.6292, -0.7090,  0.6412,  ...,  0.1641,  0.3424,  0.2521],
        ...,
        [-0.1466, -1.4848, -0.2302,  ..., -1.4025, -1.0243, -1.2190],
        [-0.1349, -1.1038, -0.1554,  ...,  0.2204,  0.0709, -0.6520],
        [-0.8323, -1.1708, -0.8412,  ..., -1.1421, -0.5476, -0.7800]],
       dtype=torch.float64)

In [31]:
y_train_tensor.shape

torch.Size([398])

In [32]:
from torch.utils.data import DataLoader,Dataset

In [33]:
class CustomDataset(Dataset):
    def __init__(self,features,labels):
        self.features = features
        self.labels = labels

        # super().__init__()
    
    def __getitem__(self, index):
        return self.features[index],self.labels[index]
    
    def __len__(self):
        return self.features.shape[0]

In [None]:
train_dataset = CustomDataset(x_train_tensor,y_train_tensor)
test_dataset = CustomDataset(x_test_tensor,y_test_tensor)

In [35]:
train_dataset[10]

(tensor([ 0.0713,  0.5956,  0.1055, -0.0270,  0.1480,  0.1091,  0.5753,  0.2130,
          0.2830, -0.2461, -0.4113, -0.2923, -0.0681, -0.3298, -0.3572,  0.2810,
          0.5015, -0.0085, -0.0539,  0.0780, -0.0493,  0.7996,  0.3012, -0.1156,
          0.5363,  1.0235,  1.1330,  0.4622,  1.0180,  0.8792],
        dtype=torch.float64),
 tensor(1))

In [37]:
train_loader = DataLoader(train_dataset,batch_size=32,shuffle=True)
test_loader = DataLoader(test_dataset,batch_size=32,shuffle=True)
import torch.nn as nn

In [None]:
class Model(nn.Module):

    def __init__(self, num_features):
        super().__init__()
        self.linear = nn.Linear(num_features.torch.float64,1)
        self.sigmoid = nn.Sigmoid()
        
    
    def forward(self, features):
        out = self.linear(features)
        out = self.sigmoid(out)
        return out

In [39]:
learning_rate = 0.2
epochs = 25

In [40]:
loss_function =nn.BCELoss()

In [43]:
model = Model(x_train_tensor.shape[1])

# define optimizer
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)
for epoch in range(epochs):

    # forward pass
    y_pred = model(x_train_tensor)

    # loss calculation
    loss = loss_function(y_pred,y_train_tensor.reshape(-1,1))

    # backward pass
    loss.backward()

    # # parameters update
    # with torch.no_grad():
    #     model.linear.weight -= learning_rate * model.linear.weight.grad
    #     model.linear.bias -= learning_rate * model.linear.bias.grad
    
    # # zero gradients
    # model.linear.weight.grad.zero_()
    # model.linear.bias.grad.zero_()
    
    # parameter update
    optimizer.step()
    optimizer.zero_grad()


    # print loss in each epoch
    print(f"Epoch :{epoch+1} , Loss: {loss.item()}")


RuntimeError: mat1 and mat2 must have the same dtype, but got Double and Float