In [1]:
import os
import json
import numpy as np
import torch # Keep torch for later tensor conversion if needed

# --- Ensure the memory-mapping load function is defined ---
import zipfile
def load(scenario=1, block_size='4k', subset='train'):
    # (Use the robust load function definition from previous answers
    #  that uses zipfile.extract + np.memmap)
    # Validate parameters
    if block_size not in ['512', '4k']:
        raise ValueError('Invalid block size!')
    if scenario not in range(1, 7):
        raise ValueError('Invalid scenario!')
    if subset not in ['train', 'val', 'test']:
        raise ValueError('Invalid subset!')

    # Build data directory and NPZ path
    data_dir = os.path.join('./dataset', '{:s}_{:1d}'.format(block_size, scenario))
    npz_path = os.path.join(data_dir, '{}.npz'.format(subset))
    print(f"Processing NPZ file: {npz_path}")

    # Load classes information from classes.json
    if os.path.isfile('classes.json'):
        with open('classes.json') as json_file:
            classes = json.load(json_file)
            labels_list = classes[str(scenario)] # Get list of labels for the scenario
    else:
        raise FileNotFoundError('Please download classes.json to the current directory!')

    # Load y from the NPZ (this is small)
    try:
        with np.load(npz_path, allow_pickle=True) as data_zip:
            y = data_zip['y']
        print(f"Loaded 'y' array, shape: {y.shape}")
    except Exception as e:
        raise RuntimeError(f"Failed to load 'y' from {npz_path}: {e}")

    num_samples = y.shape[0]

    # Determine sample length based on block_size
    sample_len = 4096 if block_size == '4k' else 512
    desired_shape = (num_samples, sample_len)

    # Define path for the npy file that will be used for memmapping x
    memmap_path = os.path.join(data_dir, '{}_x.npy'.format(subset))

    # If memmap file does not exist, extract it from the NPZ
    if not os.path.exists(memmap_path):
        print(f"Extracting 'x' data to memmap file: {memmap_path}")
        try:
            with zipfile.ZipFile(npz_path, 'r') as z:
                name = None
                for filename in z.namelist():
                    if filename.startswith('x') and filename.endswith('.npy'):
                        name = filename
                        break
                if name is None:
                    # Fallback if 'x.npy' isn't found directly (might be just 'x'?)
                    if 'x' in z.namelist(): name = 'x' # Check if key is just 'x'
                    else: raise KeyError("No file/key for 'x' found in the NPZ archive.")

                print(f"Found '{name}' in archive, extracting...")
                # Define temporary path for extraction
                temp_extract_path = os.path.join(data_dir, name)

                # Ensure the target directory exists
                os.makedirs(data_dir, exist_ok=True)

                # Extract directly
                with open(temp_extract_path, 'wb') as f_out, z.open(name) as f_in:
                     f_out.write(f_in.read())
                print(f"Extracted to temporary path: {temp_extract_path}")

                # Rename the extracted file to the desired memmap path
                os.rename(temp_extract_path, memmap_path)
                print(f"Renamed to memmap path: {memmap_path}")

        except Exception as e:
            # Clean up potentially partially extracted file on error
            if os.path.exists(temp_extract_path): os.remove(temp_extract_path)
            if os.path.exists(memmap_path): os.remove(memmap_path)
            raise RuntimeError(f"Failed to extract 'x' from {npz_path}: {e}")
    else:
        print(f"Found existing memmap file at: {memmap_path}")

    # Open a memmap for x
    try:
        print(f"Opening memmap for 'x': {memmap_path} with shape {desired_shape}")
        x = np.memmap(memmap_path, mode='r', dtype=np.uint8, shape=desired_shape)
        print("Memmap for 'x' opened successfully.")
    except Exception as e:
        raise RuntimeError(f"Failed to create memmap for {memmap_path} with shape {desired_shape}: {e}")

    return x, y, labels_list # Return the list of labels too

# --- Define Target Office Labels ---
office_labels_specialist = ['doc', 'docx', 'key', 'ppt', 'pptx', 'xls', 'xlsx']
print(f"Target Office labels: {office_labels_specialist}")

# --- Load Scenario 1 Labels to find indices ---
try:
    _, _, s1_labels_list = load(scenario=1, block_size='4k', subset='train') # Load just to get labels
except FileNotFoundError as e:
    print(f"Error: Cannot find Scenario 1 data or classes.json: {e}")
    exit() # Stop if we can't get S1 labels

# --- Find S1 indices for Office types ---
s1_office_indices = []
specialist_label_map = {} # S1 index -> Specialist index (0-6)
new_specialist_labels = [] # Store the labels in the new order

for new_idx, label in enumerate(office_labels_specialist):
    try:
        s1_idx = s1_labels_list.index(label)
        s1_office_indices.append(s1_idx)
        specialist_label_map[s1_idx] = new_idx
        new_specialist_labels.append(label)
        print(f"Found '{label}' at Scenario 1 index: {s1_idx} -> Specialist index: {new_idx}")
    except ValueError:
        print(f"Warning: Label '{label}' not found in Scenario 1 labels!")

print(f"\nScenario 1 indices for Office types: {s1_office_indices}")
print(f"Mapping from S1 index to Specialist index: {specialist_label_map}")
print(f"Final Specialist Labels: {new_specialist_labels}") # Should match office_labels_specialist

if len(s1_office_indices) != len(office_labels_specialist):
    print("Error: Not all Office labels were found in Scenario 1. Cannot proceed.")
    exit()

Target Office labels: ['doc', 'docx', 'key', 'ppt', 'pptx', 'xls', 'xlsx']
Processing NPZ file: ./dataset\4k_1\train.npz
Loaded 'y' array, shape: (6144000,)
Found existing memmap file at: ./dataset\4k_1\train_x.npy
Opening memmap for 'x': ./dataset\4k_1\train_x.npy with shape (6144000, 4096)
Memmap for 'x' opened successfully.
Found 'doc' at Scenario 1 index: 44 -> Specialist index: 0
Found 'docx' at Scenario 1 index: 45 -> Specialist index: 1
Found 'key' at Scenario 1 index: 46 -> Specialist index: 2
Found 'ppt' at Scenario 1 index: 47 -> Specialist index: 3
Found 'pptx' at Scenario 1 index: 48 -> Specialist index: 4
Found 'xls' at Scenario 1 index: 49 -> Specialist index: 5
Found 'xlsx' at Scenario 1 index: 50 -> Specialist index: 6

Scenario 1 indices for Office types: [44, 45, 46, 47, 48, 49, 50]
Mapping from S1 index to Specialist index: {44: 0, 45: 1, 46: 2, 47: 3, 48: 4, 49: 5, 50: 6}
Final Specialist Labels: ['doc', 'docx', 'key', 'ppt', 'pptx', 'xls', 'xlsx']


In [2]:
def create_specialist_dataset(source_scenario, source_block_size, subset,
                              target_indices_s1, target_label_map_s1,
                              output_dir, output_prefix="office_specialist"):
    """
    Creates a specialist dataset by filtering Scenario 1 data.

    Args:
        source_scenario (int): Scenario number of the source data (should be 1).
        source_block_size (str): Block size ('4k' or '512').
        subset (str): Data split ('train', 'val', or 'test').
        target_indices_s1 (list): List of indices in the source scenario's labels
                                   that correspond to the specialist classes.
        target_label_map_s1 (dict): Mapping from source index to new specialist index (0 to N-1).
        output_dir (str): Directory to save the new dataset files.
        output_prefix (str): Prefix for the output filenames.
    """
    print(f"\n--- Creating Specialist Dataset for Subset: {subset} ---")
    os.makedirs(output_dir, exist_ok=True)

    # Load Scenario 1 data (x is memmapped, y is in memory)
    try:
        x_s1_mm, y_s1, _ = load(source_scenario, source_block_size, subset)
    except Exception as e:
        print(f"Error loading source data for {subset}: {e}")
        return

    # Create a boolean mask to find rows matching target indices
    print("Creating filter mask...")
    mask = np.isin(y_s1, target_indices_s1)
    num_target_samples = np.sum(mask)
    print(f"Found {num_target_samples} samples for the specialist dataset in '{subset}'.")

    if num_target_samples == 0:
        print("Warning: No samples found for this subset. Skipping save.")
        return

    # Filter the 'y' array and remap labels
    print("Filtering and remapping labels ('y')...")
    y_specialist = y_s1[mask]
    # Apply the mapping: for each old label index, get the new one
    y_specialist_remapped = np.array([target_label_map_s1[old_idx] for old_idx in y_specialist], dtype=np.uint8)
    print(f"Shape of new 'y': {y_specialist_remapped.shape}")

    # Check memory requirements before trying to filter 'x'
    bytes_needed = num_target_samples * x_s1_mm.shape[1] * x_s1_mm.dtype.itemsize
    gb_needed = bytes_needed / (1024**3)
    print(f"Estimated memory needed to load filtered 'x' into RAM: {gb_needed:.2f} GiB")

    # Define output paths
    output_x_path = os.path.join(output_dir, f"{output_prefix}_{subset}_x.npy")
    output_y_path = os.path.join(output_dir, f"{output_prefix}_{subset}_y.npy")

    # Filter 'x' - Try direct indexing first if memory seems okay, otherwise iterate
    # Threshold for direct indexing (e.g., 4 GiB - adjust based on your RAM)
    direct_index_threshold_gb = 4.0

    try:
        if gb_needed < direct_index_threshold_gb:
            print("Attempting direct indexing to filter 'x' into memory...")
            # Use .copy() to force loading into RAM and create a standard numpy array
            x_specialist = x_s1_mm[mask].copy()
            print("Direct indexing successful.")

            # Save the new arrays
            print(f"Saving filtered 'x' to: {output_x_path}")
            np.save(output_x_path, x_specialist)
            print(f"Saving remapped 'y' to: {output_y_path}")
            np.save(output_y_path, y_specialist_remapped)

        else:
            print("Estimated memory too large for direct indexing. Using iterative copy...")
            # Create a writeable memmap file for the output
            x_specialist_shape = (num_target_samples, x_s1_mm.shape[1])
            x_specialist_mm_out = np.memmap(output_x_path, dtype=x_s1_mm.dtype, mode='w+', shape=x_specialist_shape)

            # Find the indices we need to copy
            target_row_indices = np.where(mask)[0]

            # Iterate and copy in chunks
            chunk_size = 10000 # Adjust chunk size based on available RAM
            start_idx_out = 0
            for i in tqdm(range(0, num_target_samples, chunk_size), desc="Copying chunks"):
                end_idx_in_indices = min(i + chunk_size, num_target_samples)
                indices_to_copy = target_row_indices[i:end_idx_in_indices]

                # Read slice from source memmap
                data_slice = x_s1_mm[indices_to_copy] # Read slice

                # Determine corresponding output slice indices
                num_in_slice = len(indices_to_copy)
                end_idx_out = start_idx_out + num_in_slice

                # Write slice to output memmap
                x_specialist_mm_out[start_idx_out:end_idx_out] = data_slice

                start_idx_out = end_idx_out

            # Ensure data is written to disk
            del x_specialist_mm_out # Deleting flushes and closes the memmap
            print("Iterative copy complete.")

            # Save the corresponding 'y' array
            print(f"Saving remapped 'y' to: {output_y_path}")
            np.save(output_y_path, y_specialist_remapped)

        print(f"Successfully created specialist dataset files for '{subset}' in '{output_dir}'.")

    except MemoryError as e:
        print(f"\nMemoryError occurred during filtering/saving 'x': {e}")
        print("Even the chosen filtering method failed. Try reducing chunk_size if using iterative copy, or ensure more system RAM/PageFile.")
    except Exception as e:
        print(f"\nAn error occurred during filtering/saving: {e}")

    # It's good practice to explicitly delete large objects when done
    del x_s1_mm, y_s1, mask
    if 'x_specialist' in locals(): del x_specialist
    if 'y_specialist' in locals(): del y_specialist
    if 'y_specialist_remapped' in locals(): del y_specialist_remapped

In [3]:
# Define output directory for the specialist dataset
output_specialist_dir = './dataset/office_specialist_4k'

# Create the datasets
create_specialist_dataset(
    source_scenario=1,
    source_block_size='4k',
    subset='train',
    target_indices_s1=s1_office_indices,
    target_label_map_s1=specialist_label_map,
    output_dir=output_specialist_dir
)

create_specialist_dataset(
    source_scenario=1,
    source_block_size='4k',
    subset='val',
    target_indices_s1=s1_office_indices,
    target_label_map_s1=specialist_label_map,
    output_dir=output_specialist_dir
)

create_specialist_dataset(
    source_scenario=1,
    source_block_size='4k',
    subset='test',
    target_indices_s1=s1_office_indices,
    target_label_map_s1=specialist_label_map,
    output_dir=output_specialist_dir
)

# Also save the new labels list
output_labels_path = os.path.join(output_specialist_dir, 'labels.json')
with open(output_labels_path, 'w') as f:
    json.dump(new_specialist_labels, f)
print(f"\nSaved specialist labels to {output_labels_path}")


--- Creating Specialist Dataset for Subset: train ---
Processing NPZ file: ./dataset\4k_1\train.npz
Loaded 'y' array, shape: (6144000,)
Found existing memmap file at: ./dataset\4k_1\train_x.npy
Opening memmap for 'x': ./dataset\4k_1\train_x.npy with shape (6144000, 4096)
Memmap for 'x' opened successfully.
Creating filter mask...
Found 573528 samples for the specialist dataset in 'train'.
Filtering and remapping labels ('y')...
Shape of new 'y': (573528,)
Estimated memory needed to load filtered 'x' into RAM: 2.19 GiB
Attempting direct indexing to filter 'x' into memory...
Direct indexing successful.
Saving filtered 'x' to: ./dataset/office_specialist_4k\office_specialist_train_x.npy
Saving remapped 'y' to: ./dataset/office_specialist_4k\office_specialist_train_y.npy
Successfully created specialist dataset files for 'train' in './dataset/office_specialist_4k'.

--- Creating Specialist Dataset for Subset: val ---
Processing NPZ file: ./dataset\4k_1\val.npz
Loaded 'y' array, shape: (768

In [4]:
def load_specialist(dataset_dir, subset='train'):
    """Loads data from the specialist dataset directory."""
    x_path = os.path.join(dataset_dir, f"office_specialist_{subset}_x.npy")
    y_path = os.path.join(dataset_dir, f"office_specialist_{subset}_y.npy")
    labels_path = os.path.join(dataset_dir, 'labels.json')

    if not (os.path.exists(x_path) and os.path.exists(y_path)):
        raise FileNotFoundError(f"Dataset files not found in {dataset_dir} for subset {subset}")

    print(f"Loading specialist data: {x_path}, {y_path}")
    # Use np.load with mmap_mode to correctly read the header and shape
    x = np.load(x_path, mmap_mode='r')
    y = np.load(y_path)
    # Load labels
    with open(labels_path, 'r') as f:
        labels = json.load(f)

    print(f"Loaded specialist {subset} data: x_shape={x.shape}, y_shape={y.shape}")
    return x, y, labels

# Example usage:
x, y, labels = load_specialist(output_specialist_dir, 'train')

Loading specialist data: ./dataset/office_specialist_4k\office_specialist_train_x.npy, ./dataset/office_specialist_4k\office_specialist_train_y.npy
Loaded specialist train data: x_shape=(573528, 4096), y_shape=(573528,)


In [5]:
print(labels)

['doc', 'docx', 'key', 'ppt', 'pptx', 'xls', 'xlsx']


In [6]:
# Create PyTorch Datasets & DataLoaders
x_tensor = torch.tensor(x, dtype=torch.uint8)  # assuming x contains int byte values (0-255 + padding)
del x
y_tensor = torch.tensor(y, dtype=torch.uint8)
del y
train_dataset = torch.utils.data.TensorDataset(x_tensor, y_tensor)
del x_tensor
del y_tensor
# train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True,pin_memory=True,num_workers=4,prefetch_factor=2)
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=16,
    shuffle=True,
    pin_memory=True,
    num_workers=6,
)

del train_dataset

In [7]:
x, y, labels = load_specialist(output_specialist_dir, 'test')

Loading specialist data: ./dataset/office_specialist_4k\office_specialist_test_x.npy, ./dataset/office_specialist_4k\office_specialist_test_y.npy
Loaded specialist test data: x_shape=(71718, 4096), y_shape=(71718,)


In [8]:
# Create PyTorch Datasets & DataLoaders
x_tensor = torch.tensor(x, dtype=torch.uint8)  # assuming x contains int byte values (0-255 + padding)
del x
y_tensor = torch.tensor(y, dtype=torch.uint8)
del y
test_dataset = torch.utils.data.TensorDataset(x_tensor, y_tensor)
del x_tensor
del y_tensor
# train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True,pin_memory=True,num_workers=4,prefetch_factor=2)
test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=16,
    shuffle=True,
    pin_memory=True,
    num_workers=6,
)

del test_dataset

In [9]:
x, y, labels = load_specialist(output_specialist_dir, 'val')

Loading specialist data: ./dataset/office_specialist_4k\office_specialist_val_x.npy, ./dataset/office_specialist_4k\office_specialist_val_y.npy
Loaded specialist val data: x_shape=(71554, 4096), y_shape=(71554,)


In [10]:
# Create PyTorch Datasets & DataLoaders
x_tensor = torch.tensor(x, dtype=torch.uint8)  # assuming x contains int byte values (0-255 + padding)
del x
y_tensor = torch.tensor(y, dtype=torch.uint8)
del y
val_dataset = torch.utils.data.TensorDataset(x_tensor, y_tensor)
del x_tensor
del y_tensor
# train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True,pin_memory=True,num_workers=4,prefetch_factor=2)
val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=16,
    shuffle=True,
    pin_memory=True,
    num_workers=6,
)

del val_dataset

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super(Attention, self).__init__()
        self.attn = nn.Linear(hidden_dim, 1)

    def forward(self, gru_output):
        # gru_output: (B, L, H)
        attn_weights = F.softmax(self.attn(gru_output), dim=1)  # (B, L, 1)
        context = torch.sum(attn_weights * gru_output, dim=1)   # (B, H)
        return context

class CNN_GRU_Attn_Classifier(nn.Module):
    def __init__(self, num_classes):
        super(CNN_GRU_Attn_Classifier, self).__init__()

        self.embedding_dim = 64
        self.vocab_size = 257  # 0–255 + 1 for PAD

        # Embedding layer
        self.embedding = nn.Embedding(
            num_embeddings=self.vocab_size,
            embedding_dim=self.embedding_dim,
            padding_idx=256
        )

        # CNN layers
        self.conv1 = nn.Conv1d(self.embedding_dim, 64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm1d(64)

        self.conv2 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm1d(128)

        self.conv3 = nn.Conv1d(128, 256, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm1d(256)

        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(0.3)

        # GRU layer
        self.gru = nn.GRU(input_size=256, hidden_size=128, num_layers=1,
                          batch_first=True, bidirectional=True)

        # Attention Layer
        self.attention = Attention(hidden_dim=128 * 2)  # Bidirectional GRU output

        # Fully connected layers
        self.fc1 = nn.Linear(128 * 2, 256)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.embedding(x)        # (B, L, D)
        x = x.permute(0, 2, 1)       # (B, D, L)

        x = self.pool(F.gelu(self.bn1(self.conv1(x))))
        x = self.pool(F.gelu(self.bn2(self.conv2(x))))
        x = self.pool(F.gelu(self.bn3(self.conv3(x))))  # (B, 256, L_out)

        x = x.permute(0, 2, 1)       # (B, L_out, 256)
        gru_out, _ = self.gru(x)     # (B, L_out, 2*128)

        x = self.attention(gru_out)  # (B, 2*128)

        x = self.dropout(F.gelu(self.fc1(x)))
        x = self.fc2(x)

        return x




In [12]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [13]:
device

device(type='cuda')

In [14]:
model = CNN_GRU_Attn_Classifier(num_classes=7).to(device)



In [15]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=1, factor=0.5)



In [16]:
import requests
from pathlib import Path 

# Download helper functions from Learn PyTorch repo (if not already downloaded)
if Path("helper_functions.py").is_file():
  print("helper_functions.py already exists, skipping download")
else:
  print("Downloading helper_functions.py")
  # Note: you need the "raw" GitHub URL for this to work
  request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
  with open("helper_functions.py", "wb") as f:
    f.write(request.content)
from helper_functions import accuracy_fn

helper_functions.py already exists, skipping download


In [17]:
from tqdm import tqdm
import torch

def train_model(model, train_loader, criterion, optimizer, num_epochs=10, device=device):
    model.train()
    
    for epoch in range(num_epochs):
        total_loss = 0.0
        
        # Wrap your DataLoader in tqdm
        batch_iter = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch")
        for X, y in batch_iter:
            # send to device and cast to long only per‑batch
            X = X.to(device).long()
            y = y.to(device).long()
            
            # forward / backward
            y_pred = model(X)
            loss = criterion(y_pred, y)
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            
            total_loss += loss.item()
            
            current_n = batch_iter.n if batch_iter.n > 0 else 1
            batch_iter.set_postfix(loss=total_loss / current_n)
        
        avg_loss = total_loss / len(train_loader)
        print(f"→ Epoch {epoch+1} complete. Avg Loss: {avg_loss:.4f}")
        # scheduler.step()


In [18]:
from tqdm import tqdm
import torch

def train_model(model, train_loader, criterion, optimizer, num_epochs=10, device=device):
    model.train()
    
    for epoch in range(num_epochs):
        total_loss = 0.0
        
        # Wrap your DataLoader in tqdm
        batch_iter = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch")
        for X, y in batch_iter:
            # send to device and cast to long only per‑batch
            X = X.to(device).long()
            y = y.to(device).long()
            
            # forward / backward
            y_pred = model(X)
            loss = criterion(y_pred, y)
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            
            total_loss += loss.item()
            
            current_n = batch_iter.n if batch_iter.n > 0 else 1
            batch_iter.set_postfix(loss=total_loss / current_n)
        
        avg_loss = total_loss / len(train_loader)
        print(f"→ Epoch {epoch+1} complete. Avg Loss: {avg_loss:.4f}")
        # scheduler.step()


In [19]:
def test_model(model, test_loader, criterion,scheduler ,device=device):
    model.eval()  # Set model to evaluation mode
    with torch.inference_mode():
        test_loss, test_acc = 0, 0
        
        for X, y in test_loader:
            X, y = X.to(torch.long).to(device), y.to(torch.long).to(device)

            test_pred = model(X)  # Forward pass
            loss = criterion(test_pred, y)  # Compute loss
            test_loss += loss.item()
            test_acc += accuracy_fn(y_true=y, y_pred=test_pred.argmax(dim=1))
            
        test_loss /= len(test_loader)
        test_acc /= len(test_loader)
    
    print(f"Test Loss: {test_loss:.4f}, Accuracy: {test_acc:.2f}%")
    scheduler.step(test_loss)



In [21]:


train_model(model, train_loader, criterion, optimizer, num_epochs=1)
test_model(model, val_loader,criterion,scheduler)


 

Epoch 1/1: 100%|██████████| 35846/35846 [30:50<00:00, 19.37batch/s, loss=0.267]


→ Epoch 1 complete. Avg Loss: 0.2665
Test Loss: 0.3463, Accuracy: 86.98%


In [22]:
torch.save(model.state_dict(),'./models/FFToffice-10epocs.pth')

In [None]:


train_model(model, train_loader, criterion, optimizer, num_epochs=1)
test_model(model, val_loader,criterion,scheduler)

 

In [None]:
torch.save(model.state_dict(),'./models/FFToffice-10epocs.pth')

In [20]:
model.load_state_dict(torch.load('./models/FFToffice-9epocs.pth', weights_only=True))

<All keys matched successfully>