In [None]:
import os
import sys
import subprocess

def setup_environment(
    github_repo="dcrew44/GeoStateNet",
    branch="master",
    mount_drive=True
):
    # Mount Google Drive if requested
    if mount_drive:
        from google.colab import drive
        drive.mount('/content/drive')
        print("Google Drive mounted.")

    # Clone the repository
    repo_url = f"https://github.com/{github_repo}.git"
    repo_dir = github_repo.split("/")[1]

    if os.path.exists(repo_dir):
        print(f"Repository directory {repo_dir} already exists. Pulling latest changes...")
        os.chdir(repo_dir)
        subprocess.run(["git", "pull", "origin", branch])
        os.chdir("..")
    else:
        print(f"Cloning repository from {repo_url}...")
        subprocess.run(["git", "clone", "-b", branch, repo_url])

    # Add the repository to Python path
    repo_path = os.path.abspath(repo_dir)
    if repo_path not in sys.path:
        sys.path.insert(0, repo_path)

    print(f"Setup complete! The codebase is available in /{repo_dir}/")
    return repo_path

# Run the setup to get repository path
repo_path = setup_environment()


Mounted at /content/drive
Google Drive mounted.
Cloning repository from https://github.com/dcrew44/geoguessr-state-classifier.git...
Setup complete! The codebase is available in /geoguessr-state-classifier/


In [None]:
import zipfile
import os

# Create data directories in the project
train_dir = os.path.join(repo_path, "data", "train")
test_dir = os.path.join(repo_path, "data", "test")


# Create directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

print(f"Extracting training data to {train_dir}...")
zip_ref = zipfile.ZipFile("/path/to/dataset/zipfile")
zip_ref.extractall(train_dir)
zip_ref.close()
print("Training data extraction complete!")

print(f"Extracting test data to {test_dir}...")
zip_ref = zipfile.ZipFile("/path/to/testdataset/zipfile")
zip_ref.extractall(test_dir)
zip_ref.close()
print("Test data extraction complete!")

# Print structure to verify
print("\nData directory structure:")
!find {repo_path}/data -type d -maxdepth 3 | sort

Extracting training data to /content/geoguessr-state-classifier/data/train...
Training data extraction complete!
Extracting test data to /content/geoguessr-state-classifier/data/test...
Test data extraction complete!

Data directory structure:
/content/geoguessr-state-classifier/data
/content/geoguessr-state-classifier/data/test
/content/geoguessr-state-classifier/data/test/test_data
/content/geoguessr-state-classifier/data/test/test_data/Alabama
/content/geoguessr-state-classifier/data/test/test_data/Alaska
/content/geoguessr-state-classifier/data/test/test_data/Arizona
/content/geoguessr-state-classifier/data/test/test_data/Arkansas
/content/geoguessr-state-classifier/data/test/test_data/California
/content/geoguessr-state-classifier/data/test/test_data/Colorado
/content/geoguessr-state-classifier/data/test/test_data/Connecticut
/content/geoguessr-state-classifier/data/test/test_data/Delaware
/content/geoguessr-state-classifier/data/test/test_data/Florida
/content/geoguessr-state-cla

In [None]:
# Check for GPU
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

# Create config paths
config_path = os.path.join(repo_path, "config.yaml")

# Check if config exists
if os.path.exists(config_path):
    print(f"Using config from: {config_path}")
    # Import config modules (adjust import paths based on your structure)
    try:
        # Try to load configuration using your modules
        from state_classifier.config import load_config

        config = load_config(config_path)
        config.batch_size = 256
        config.num_workers = 12
        config.prefetch_factor = 4

        config.hyperparameters.phase1_epochs = 1
        config.hyperparameters.phase1_lr = 0.01

        config.hyperparameters.phase2_epochs = 1
        config.hyperparameters.phase2_lr = 0.004

        config.hyperparameters.phase3_lr = 0.001
        config.hyperparameters.phase3_epochs = 3

        config.train_phases.start_phase = 1
        config.train_phases.phase1 = True
        config.train_phases.phase2 = True
        config.train_phases.phase3 = True

        config.hyperparameters.patience = 2
        config.hyperparameters.weight_decay = 0.01

        # Update paths to use the extracted data
        config.dataset_root = os.path.join(repo_path, "data", "train")
        config.test_dataset_root = os.path.join(repo_path, "data", "test","test_data")
        config.checkpoints_dir = os.path.join(repo_path, "checkpoints")

        # Ensure checkpoints directory exists
        os.makedirs(config.checkpoints_dir, exist_ok=True)

        print("Configuration updated with correct paths:")
        print(f"- Training data: {config.dataset_root}")
        print(f"- Test data: {config.test_dataset_root}")
        print(f"- Checkpoints: {config.checkpoints_dir}")
    except ImportError as e:
        print(f"Could not import configuration module: {e}")
        print("You may need to adjust the import paths based on your project structure.")
else:
    print(f"Config file not found at {config_path}")
    print("You may need to create a config file first.")

CUDA available: True
GPU: NVIDIA A100-SXM4-40GB
Memory: 42.47 GB
Using config from: /content/geoguessr-state-classifier/config.yaml
Configuration updated with correct paths:
- Training data: /content/geoguessr-state-classifier/data/train
- Test data: /content/geoguessr-state-classifier/data/test/test_data
- Checkpoints: /content/geoguessr-state-classifier/checkpoints


In [None]:
import wandb

# Create and run experiment
try:
    from state_classifier.experiment import Experiment
    experiment = Experiment(config)
    experiment.run()

except ImportError as e:
    print(f"Could not import experiment module: {e}")
    print("You may need to adjust the import paths based on your project structure.")

# Finish W&B run
wandb.finish()

=== Phase 1 ===


Training [1]: 100%|██████████| 1758/1758 [07:02<00:00,  4.16it/s]


Train Loss: 3.2578, Train Acc: 21.09%


Validating [1]: 100%|██████████| 196/196 [00:38<00:00,  5.13it/s]


Val Loss: 2.7729, Val Acc: 32.29%
Loaded best Phase 1 weights.
=== Phase 2 ===


Training [1]: 100%|██████████| 1758/1758 [07:13<00:00,  4.06it/s]


Train Loss: 2.5870, Train Acc: 37.04%


Validating [1]: 100%|██████████| 196/196 [00:38<00:00,  5.15it/s]


Val Loss: 2.3174, Val Acc: 45.47%
Loaded best Phase 2 weights.
=== Phase 3 ===


Training [1]: 100%|██████████| 1758/1758 [08:11<00:00,  3.58it/s]


Train Loss: 2.2211, Train Acc: 48.74%


Validating [1]: 100%|██████████| 196/196 [00:38<00:00,  5.12it/s]


Val Loss: 2.1789, Val Acc: 49.73%


Training [2]: 100%|██████████| 1758/1758 [08:12<00:00,  3.57it/s]


Train Loss: 1.9549, Train Acc: 57.89%


Validating [2]: 100%|██████████| 196/196 [00:38<00:00,  5.12it/s]


Val Loss: 2.0726, Val Acc: 53.15%


Training [3]: 100%|██████████| 1758/1758 [08:10<00:00,  3.58it/s]


Train Loss: 1.5849, Train Acc: 72.08%


Validating [3]: 100%|██████████| 196/196 [00:38<00:00,  5.12it/s]


Val Loss: 2.1007, Val Acc: 52.85%
Loaded best Phase 3 weights.


Testing: 100%|██████████| 1563/1563 [01:20<00:00, 19.45it/s]


Test Accuracy: 0.6763


0,1
test/accuracy,▁
train/accuracy,▁▆█
train/loss,█▃▁
train/lr,▁█▁
val/accuracy,▁██
val/loss,█▁▁

0,1
test/accuracy,0.67629
train/accuracy,72.07648
train/loss,1.58489
train/lr,0.0
val/accuracy,52.85271
val/loss,2.10075
