<a href="https://colab.research.google.com/github/DanielleRaine/Bird-Species-Distribution-Modeling-with-Location-Information/blob/main/BirdHotspotDataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [94]:
import os
import json
import pandas as pd
import torch
from torch.utils.data import Dataset

In [132]:
class BirdHotspotsDataset(Dataset):
  """
  Dataset for Bird Species Distribution Modeling with Location Information (Bird Hotspots).
  """

  def __init__(self, csv_file, targets_dir):
    """
    Args:
      csv_file (string): Path to the csv file.
      targets_dir (string): Directory with all the target files.
    """

    self.data = pd.read_csv(csv_file)
    self.targets_dir = targets_dir

  def __len__(self):
    """
    Returns the length of the dataset.
    Returns:
      length (int): Length of the dataset.
    """

    return len(self.data)

  def __getitem__(self, idx):
    """
    Returns the record and its target.
    Args:
      idx (int): Index of the record.
    Returns:
      record (torch.Tensor): Record.
      target (torch.Tensor): Target.
    """

    # Get the environmental + location data for one record.
    record = self.data.iloc[idx]

    # Get the target (label) path for the record.
    target_path = os.path.join(self.targets_dir, record.loc["hotspot_id"] + ".json")
    # Get the target and convert it into a tensor.
    with open(target_path) as f:
      target = torch.tensor(json.load(f)["probs"], dtype = torch.float32)

    # Convert the record into a tensor and return it and its target.
    return torch.tensor(pd.to_numeric(record, errors = "coerce").dropna()), target

In [111]:
# targets_dir = "/content/drive/MyDrive/Team Mila/Project Dataset/targets"

# training_data = BirdHotspotsDataset("/content/drive/MyDrive/Team Mila/Project Dataset/train_split.csv", targets_dir)
# test_data = BirdHotspotsDataset("/content/drive/MyDrive/Team Mila/Project Dataset/test_split.csv", targets_dir)

In [112]:
# from torch.utils.data import DataLoader

In [113]:
# train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
# test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)

In [114]:
# train_features, train_labels = next(iter(train_dataloader))

  return torch.tensor(pd.to_numeric(record, errors="coerce").dropna()), target


In [115]:
# print(f"Feature batch shape: {train_features.size()}")
# print(f"Labels batch shape: {train_labels.size()}")

Feature batch shape: torch.Size([64, 31])
Labels batch shape: torch.Size([64, 670])


In [123]:
# record = train_features[4]
# label = train_labels[4]

In [124]:
# record

tensor([ -89.0184,   45.7751,   40.0000,   81.0000,    4.1917,   12.0000,
          28.0374, 1119.0415,   24.8000,  -18.0000,   42.8000,   16.2667,
         -10.2500,   17.3333,  -10.2500,  800.0000,  109.0000,   23.0000,
          43.8818,  301.0000,   92.0000,  295.0000,   92.0000, 1800.0000,
        1289.0000,   11.0000,    4.0000,   34.0000,   48.0000,   30.0000,
          66.0000], dtype=torch.float64)

In [125]:
# label

tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1750, 0.0000, 0.0000,
        0.1500, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.1000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0500, 0.0000,
        0.1250, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0750, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.3250, 0.0000, 0.0250, 0.0000, 0.0000, 0.0000, 0.0250, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 