Flow for a machine learning project

1. Load configuration
2. Initialize data processor
3. Load and preprocess housing data
4. For each edge connectivity method:
   a. Build graph with specific connectivity
   b. For each model type (Simple/Multi-layer):
      - Initialize model
      - Train model
      - Evaluate on validation set
      - Save results
5. Compare all results
6. Get test result and Generate final report

Imports

In [None]:
#Setup imports and reload modules that may change during development

# Clear GPU memory
import torch
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
torch.cuda.reset_peak_memory_stats()

# Base imports
import numpy as np
import pandas as pd
from tqdm import tqdm
import time
import sys
import importlib

# Reloadable modules
import config
import data_processor
import models
import trainer
import graph_builder
import experiment_runner

# Reload them to pick up any code changes
for module in [config, data_processor, models, trainer, graph_builder, experiment_runner]:
    importlib.reload(module)

# Import classes/functions AFTER reloading
from config import Config
from data_processor import DataProcessor
from models import SimpleGCN, SimpleGAT, MultiLayerGCN
from trainer import Trainer
from experiment_runner import ExperimentRunner


print("GNN Real Estate Price Prediction Setup")


load and setup data

In [None]:
cfg = Config()
data_processor = DataProcessor()

data_processor.create_processed_data(cfg, force_recreate= False)

print(f"   Features to use: {len(cfg.engineered_embedding_features)}")
print(f"   Epochs: {cfg.default_epochs}")
print(f"   Learning rate: {cfg.default_learning_rate}")

Configuration Setup

In [None]:
# Hardware check
print(f"\nHardware Check:")
print(f"   CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"   GPU: {torch.cuda.get_device_name()}")
    print(f"   GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    print(f"   Using CPU")
print(f"   CPU threads: {torch.get_num_threads()}")

Running experiments with different edge connectivity and models

In [None]:
print("running experiments with different edge connectivity methods and models...")

# crate different configurations for different edge connectivity methods and models


cfg_array = []

# create here many configurations that are based on the cfg object

experiment_runner = ExperimentRunner()

In [None]:
# run the experiments with different configurations

In [None]:
experiment_runner.run_experiment_list(cfg_array)