In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import typing

class AlexNet(nn.Module):
    def __init__(self, input_shape: typing.Tuple[int], num_classes: int = 1000):
        super(AlexNet, self).__init__()
        
        # Assuming input_shape is (C, H, W)
        in_channels = input_shape[0]
        
        # Convolutional layers
        self.features = nn.Sequential(
            # Layer 1
            nn.Conv2d(in_channels=in_channels, out_channels=96, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.LocalResponseNorm(size=5, alpha=1e-4, beta=0.75, k=2.0),
            
            # Layer 2
            nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.LocalResponseNorm(size=5, alpha=1e-4, beta=0.75, k=2.0),
            
            # Layer 3
            nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            
            # Layer 4
            nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            
            # Layer 5
            nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.LocalResponseNorm(size=5, alpha=1e-4, beta=0.75, k=2.0),
        )
        
        # Calculate the input size for the first fully connected layer
        # This depends on the input image size
        self._to_linear = None
        self.convs(torch.randn(1, *input_shape))
        
        # Fully connected layers
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(self._to_linear, 4096),
            nn.ReLU(inplace=True),
            
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            
            nn.Linear(4096, num_classes),
            nn.Softmax(dim=1),  # Use Softmax for multi-class classification
        )
        
    def convs(self, x):
        x = self.features(x)
        if self._to_linear is None:
            self._to_linear = x.numel()  # Number of elements in the tensor
        return x
    
    def forward(self, x):
        x = self.convs(x)
        x = x.view(-1, self._to_linear)  # Flatten the tensor
        x = self.classifier(x)
        return x

# Example usage:
input_shape = (3, 227, 227)  # Input shape (channels, height, width)
num_classes = 1000
model = AlexNet(input_shape=input_shape, num_classes=num_classes)

# Test with a random input tensor
x = torch.randn(1, *input_shape)
output = model(x)
print(output.shape)  # Should output: torch.Size([1, 1000])


In [None]:
import torch
import torch.nn as nn
import typing

class AlexNet(nn.Module):
    def __init__(self, input_shape: typing.Tuple[int], num_classes: int = 1000):
        super(AlexNet, self).__init__()
        
        in_channels = input_shape[0]
        
        # Convolutional layers
        self.features = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=96, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.LocalResponseNorm(size=5, alpha=1e-4, beta=0.75, k=2.0),
            
            nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.LocalResponseNorm(size=5, alpha=1e-4, beta=0.75, k=2.0),
            
            nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.LocalResponseNorm(size=5, alpha=1e-4, beta=0.75, k=2.0),
        )
        
        # Calculate the input size for the fully connected layers
        self._to_linear = self._get_conv_output_size(input_shape)
        
        # Fully connected layers
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(self._to_linear, 4096),
            nn.ReLU(inplace=True),
            
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            
            nn.Linear(4096, num_classes),
            nn.Softmax(dim=1),
        )
        
    def _get_conv_output_size(self, shape):
        # Pass a dummy input through the convolutional layers to calculate the output size
        o = torch.randn(1, *shape)
        o = self.features(o)
        return int(torch.flatten(o).shape[0])
    
    def forward(self, x):
        print(f"Input: {x.shape}")  # Print input shape
        
        # Convolutional layers
        x = self.features(x)
        print(f"After Convolutional Layers: {x.shape}")
        
        # Flattening
        x = x.view(x.size(0), -1)
        print(f"After flattening: {x.shape}")
        
        # Fully connected layers
        x = self.classifier(x)
        print(f"After Fully Connected Layers: {x.shape}")
        
        return x


# Example usage
input_shape = (3, 227, 227)  # Input shape (channels, height, width)
num_classes = 1000
model = AlexNet(input_shape=input_shape, num_classes=num_classes)

# Test with a random input tensor
x = torch.randn(1, *input_shape)
output = model(x)


In [None]:
import torch
import torch.nn as nn
import typing

class AlexNet(nn.Module):
    def __init__(self, input_shape: typing.Tuple[int], num_classes: int = 1000):
        super(AlexNet, self).__init__()
        
        in_channels = input_shape[0]
        
        # Convolutional layers
        self.features = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=96, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.LocalResponseNorm(size=5, alpha=1e-4, beta=0.75, k=2.0),
            
            nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.LocalResponseNorm(size=5, alpha=1e-4, beta=0.75, k=2.0),
            
            nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.LocalResponseNorm(size=5, alpha=1e-4, beta=0.75, k=2.0),
        )
        
        # Calculate the input size for the fully connected layers
        self._to_linear = self._get_conv_output_size(input_shape)
        
        # Fully connected layers
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(self._to_linear, 4096),
            nn.ReLU(inplace=True),
            
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            
            nn.Linear(4096, num_classes),
            nn.Softmax(dim=1),
        )
        
    def _get_conv_output_size(self, shape):
        # Pass a dummy input through the convolutional layers to calculate the output size
        o = torch.randn(1, *shape)
        o = self.features(o)
        return int(torch.flatten(o).shape[0])
    
    def forward(self, x):
        print(f"Input shape: {x.shape}")  # Print input shape
        
        # Convolutional layers
        for i, layer in enumerate(self.features):
            x = layer(x)
            print(f"After Layer {i+1} ({layer.__class__.__name__}): {x.shape}")
        
        # Flattening
        x = x.view(x.size(0), -1)
        print(f"After flattening: {x.shape}")
        
        # Fully connected layers
        for i, layer in enumerate(self.classifier):
            x = layer(x)
            print(f"After Layer {i+1+len(self.features)} ({layer.__class__.__name__}): {x.shape}")
        
        return x


# Example usage
input_shape = (3, 227, 227)  # Input shape (channels, height, width)
num_classes = 1000
model = AlexNet(input_shape=input_shape, num_classes=num_classes)

# Test with a random input tensor
x = torch.randn(1, *input_shape)
output = model(x)


## Otra forma

In [None]:
import torch
import torch.nn as nn
import typing

class AlexNet(nn.Module):
    def __init__(self, input_shape: typing.Tuple[int], num_classes: int = 1000):
        super(AlexNet, self).__init__()
        
        # Assuming input_shape is (C, H, W)
        in_channels = input_shape[0]
        
        # Convolutional layers
        self.features = nn.Sequential(
            # Layer 1
            nn.Conv2d(in_channels=in_channels, out_channels=96, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.LocalResponseNorm(size=5, alpha=1e-4, beta=0.75, k=2.0),
            
            # Layer 2
            nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.LocalResponseNorm(size=5, alpha=1e-4, beta=0.75, k=2.0),
            
            # Layer 3
            nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            
            # Layer 4
            nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            
            # Layer 5
            nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.LocalResponseNorm(size=5, alpha=1e-4, beta=0.75, k=2.0),
        )
        
        # Calculate the input size for the first fully connected layer
        # This depends on the input image size
        self._to_linear = None
        self.convs(torch.randn(1, *input_shape))  # Run a dummy forward pass to calculate _to_linear size
        
        # Fully connected layers
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(self._to_linear, 4096),
            nn.ReLU(inplace=True),
            
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            
            nn.Linear(4096, num_classes),
            nn.Softmax(dim=1),  # Use Softmax for multi-class classification
        )
        
    def convs(self, x):
        # Convolutional layers with shape printing
        for i, layer in enumerate(self.features):
            x = layer(x)
            if isinstance(layer, nn.Conv2d):
                print(f"Shape after Conv Layer {i // 4 + 1}: {x.shape}")
        if self._to_linear is None:
            self._to_linear = x.numel()  # Number of elements in the tensor after convolutions
        return x
    
    def forward(self, x):
        x = self.convs(x)
        x = x.view(-1, self._to_linear)  # Flatten the tensor
        x = self.classifier(x)
        return x

# Example usage:
input_shape = (3, 227, 227)  # Input shape (channels, height, width)
num_classes = 1000
model = AlexNet(input_shape=input_shape, num_classes=num_classes)

# Test with a random input tensor
x = torch.randn(1, *input_shape)
output = model(x)
print(output.shape)  # Should output: torch.Size([1, 1000])


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader

# Define the transformations to apply to the data
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # AlexNet expects input size of 224x224
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load CIFAR-10 dataset
train_dataset = CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define AlexNet architecture
class AlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# Initialize the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AlexNet().to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 100 == 99:  # Print every 100 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print('Finished Training')

# Test the model
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))


##------ 
# Initialize the model

# Save the trained model
torch.save(model.state_dict(), 'alexnet_cifar10.pth')

model = AlexNet()

# Load the saved model state dictionary
model.load_state_dict(torch.load('alexnet_cifar10.pth'))

# Set the model to evaluation mode
model.eval()





In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AlexNet().to(device)

# Load the saved model state dictionary
model.load_state_dict(torch.load('alexnet_cifar10.pth'))

# Set the model to evaluation mode
model.eval()


In [None]:
from PIL import Image

# Open and preprocess your own image
img = Image.open('dog1.jpg')


In [None]:
# Define the transformations to apply to the data
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # AlexNet expects input size of 224x224
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
img_tensor = transform(img)




In [None]:

# Add batch dimension and move to device
img_tensor = img_tensor.unsqueeze(0).to(device)

# Forward pass
with torch.no_grad():
    output = model(img_tensor)

# Get predicted class
#_, predicted = torch.max(output, 1)

In [None]:
_, predicted = torch.max(output, 1)
print(predicted)

In [None]:
# Clases, etiquetas de CIFAR-10
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


In [None]:
class_names[predicted.item()]