<a href="https://colab.research.google.com/github/eshal26/Bank-Costumer-Churn-Prediction/blob/main/ResNet50.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!unzip Segmented.zip

In [3]:
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import os
import sklearn
from sklearn.model_selection import train_test_split

In [4]:
from sklearn.model_selection import train_test_split
import shutil
import os

# Define paths to your original dataset directory
original_data_dir = 'Segmented'

# Define paths to create directories for the new dataset splits
train_dir = 'train_dataset'
val_dir = 'validation_dataset'
test_dir = 'test_dataset'

# Create directories for the new dataset splits
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# List of categories (classes)
categories = ['Benign', 'Early', 'Pre', 'Pro']

# Iterate over each category to split the dataset
for category in categories:
    category_dir = os.path.join(original_data_dir, category)
    # Get list of all image filenames in the current category
    image_files = [os.path.join(category_dir, img) for img in os.listdir(category_dir)]
    # Split image filenames into training and testing sets
    train_files, test_val_files = train_test_split(image_files, test_size=0.2, random_state=42)
    val_files, test_files = train_test_split(test_val_files, test_size=0.5, random_state=42)

    # Create directories for the current category in each split
    train_category_dir = os.path.join(train_dir, category)
    val_category_dir = os.path.join(val_dir, category)
    test_category_dir = os.path.join(test_dir, category)
    os.makedirs(train_category_dir, exist_ok=True)
    os.makedirs(val_category_dir, exist_ok=True)
    os.makedirs(test_category_dir, exist_ok=True)

    # Copy files to the corresponding split directories
    for file in train_files:
        shutil.copy(file, train_category_dir)
    for file in val_files:
        shutil.copy(file, val_category_dir)
    for file in test_files:
        shutil.copy(file, test_category_dir)

# Print out the number of images in each split and category
def count_images(directory):
    return sum(len(files) for _, _, files in os.walk(directory))

for category in categories:
    print(f"Category: {category}")
    print(f"Number of training images: {count_images(os.path.join(train_dir, category))}")
    print(f"Number of validation images: {count_images(os.path.join(val_dir, category))}")
    print(f"Number of test images: {count_images(os.path.join(test_dir, category))}")
    print("--------------------")


Category: Benign
Number of training images: 403
Number of validation images: 50
Number of test images: 51
--------------------
Category: Early
Number of training images: 788
Number of validation images: 98
Number of test images: 99
--------------------
Category: Pre
Number of training images: 770
Number of validation images: 96
Number of test images: 97
--------------------
Category: Pro
Number of training images: 643
Number of validation images: 80
Number of test images: 81
--------------------


In [5]:
import torch
from torchvision import transforms, datasets

# Define transformations for preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load training, validation, and test datasets using ImageFolder
train_dataset = datasets.ImageFolder('train_dataset', transform=transform)
val_dataset = datasets.ImageFolder('validation_dataset', transform=transform)
test_dataset = datasets.ImageFolder('test_dataset', transform=transform)

# Create data loaders for batching and shuffling data
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)


In [6]:
import torchvision.models as models

# Load pre-trained ResNet-50 model
model = models.resnet50(pretrained=True)

# Modify the final fully connected layer for your specific task
num_classes = len(train_dataset.classes)
model.fc = torch.nn.Linear(model.fc.in_features, num_classes)

# Optionally, move model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 166MB/s]


In [7]:
import torch.optim as optim

# Define loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch + 1}, Training Loss: {running_loss / len(train_loader)}")

# Optionally, evaluate the model on the validation set
# (calculate validation accuracy, adjust hyperparameters, etc.)


Epoch 1, Training Loss: 0.3188421130180359
Epoch 2, Training Loss: 0.16942309178184808
Epoch 3, Training Loss: 0.17775561728673736
Epoch 4, Training Loss: 0.11890113095911901
Epoch 5, Training Loss: 0.09921097834651335
Epoch 6, Training Loss: 0.09129482621281612
Epoch 7, Training Loss: 0.07412011129994558
Epoch 8, Training Loss: 0.1680719896969272
Epoch 9, Training Loss: 0.07923168439713375
Epoch 10, Training Loss: 0.03560149782300336


In [8]:
# Evaluate the model on the test set
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = correct / total
print(f"Test Accuracy: {test_accuracy}")


Test Accuracy: 0.9817073170731707
