# Generative negative sample points

This script will generate 10.000 random locations with random times worldwide

In [6]:
import csv
import random
from datetime import datetime, timedelta

# Define the number of samples you want
num_samples = 10000

# Generate negative samples
data = []
for i in range(num_samples):
    # Generate random latitude and longitude coordinates
    latitude = random.uniform(-90, 90)
    longitude = random.uniform(-180, 180)

    # Generate a random date between 2000-01-01 and 2023-12-31
    start_date = datetime(2000, 1, 1)
    end_date = datetime(2023, 12, 31)
    random_date = start_date + timedelta(days=random.randint(0, (end_date - start_date).days))

    # Format the coordinates and date
    location = f"({latitude}, {longitude})"
    observed_on = random_date.strftime('%Y-%m-%d %H:%M:%S')

    # Append the sample to the data list
    data.append({'species': None, 'location': location, 'observed_on': observed_on})

# Write the data to a CSV file
output_file = "data/negative_samples.csv"
fieldnames = ['species', 'location', 'observed_on']
with open(output_file, "w", newline="") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(data)

print(f"Generated {num_samples} negative samples and saved to {output_file}")


Generated 10000 negative samples and saved to negative_samples.csv


# Generative negative sample points within a region 

This script will generate 10.000 random locations with random times inside of a given geometry. 

In [12]:
import json
from shapely.geometry import shape, Point
import csv
import random
from datetime import datetime, timedelta

# Load the GeoJSON polygons
with open('spain.geojson', 'r') as f:
    geojson_data = json.load(f)

# Extract polygons from the GeoJSON data
polygons = [shape(feature['geometry']) for feature in geojson_data['features']]

# Define the number of samples you want
num_samples = 10000

# Generate negative samples
data = []
while len(data) < num_samples:
    # Generate random latitude and longitude coordinates
    latitude = random.uniform(-90, 90)
    longitude = random.uniform(-180, 180)

    # Create a Point object from the coordinates
    point = Point(longitude, latitude)

    # Check if the point falls within any of the polygons
    if any(polygon.contains(point) for polygon in polygons):
        # Generate a random date between 2000-01-01 and 2023-12-31
        start_date = datetime(2000, 1, 1)
        end_date = datetime(2023, 12, 31)
        random_date = start_date + timedelta(days=random.randint(0, (end_date - start_date).days))

        # Format the coordinates and date
        location = f"({latitude}, {longitude})"
        observed_on = random_date.strftime('%Y-%m-%d %H:%M:%S')

        # Append the sample to the data list
        data.append({'species': None, 'location': location, 'observed_on': observed_on})

# Write the data to a CSV file
output_file = "negative_samples_within_polygons.csv"
fieldnames = ['species', 'location', 'observed_on']
with open(output_file, "w", newline="") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(data)

print(f"Generated {num_samples} negative samples within the polygons and saved to {output_file}")


Generated 10000 negative samples within the polygons and saved to negative_samples_within_polygons.csv
