In [1]:
# This cell imports all necessary libraries and mounts Google Drive.

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import gc
import os

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, random_split

from sklearn.metrics import f1_score, classification_report, confusion_matrix
from google.colab import drive

print("Setting up environment and mounting Google Drive...")
drive.mount('/content/drive')
print("Setup complete.")

Setting up environment and mounting Google Drive...
Mounted at /content/drive
Setup complete.


In [2]:
# We load the 4 training CSVs, combine them, and immediately delete the
# original DataFrames to free up RAM.

DATA_PATH = '/content/drive/MyDrive/Wafer/'

print("\nLoading data...")

try:
    if not os.path.exists(DATA_PATH):
        raise FileNotFoundError

    # The first 2 files have a header, which we use to define the columns
    training_1 = pd.read_csv(f"{DATA_PATH}training_1.csv", delimiter=';', quotechar='"')
    training_4 = pd.read_csv(f"{DATA_PATH}training_4.csv", delimiter=';', quotechar='"')
    column_names = training_1.columns.tolist()

    # The other files do not have a header
    training_2 = pd.read_csv(f"{DATA_PATH}training_2.csv", sep=';', header=None, names=column_names, skiprows=1)
    training_3 = pd.read_csv(f"{DATA_PATH}training_3.csv", sep=';', header=None, names=column_names, skiprows=1)

    print("Combining datasets...")
    combined_data = pd.concat([training_1, training_2, training_3, training_4], ignore_index=True)

    # --- Memory Management ---
    print("Freeing up memory by deleting original dataframes...")
    del training_1, training_2, training_3, training_4
    gc.collect()

    print(f"Combined data loaded with shape: {combined_data.shape}")

except FileNotFoundError:
    print(f"FATAL ERROR: Could not find data files at '{DATA_PATH}'.")
    print("Please make sure the path is correct and that the files are present.")
    combined_data = None


Loading data...
Combining datasets...
Freeing up memory by deleting original dataframes...
Combined data loaded with shape: (14007200, 14)
