In [4]:
import numpy as np
import pandas as pd
import random
from datetime import datetime, timedelta

#Coordinate dictionary
coordinate = {
    "Gral Guemes": {"latitude": -24.6667, "longitude": -65.0500},
    "Salta Capital": {"latitude": -24.7859, "longitude": -65.4117},
    "San Antonio de los Cobres": {"latitude": -24.2305, "longitude": -66.6181},
    "Salar de Pocitos": {"latitude": -24.3833, "longitude": -67.0000},
    "Salar del Hombre Muerto": {"latitude": -25.4667, "longitude": -67.1333},
    "Tolar Grande": {"latitude": -24.5886, "longitude": -67.4058}
}

#Locations dictionary
#msbsl: meters below sea level
locations = {
    "Gral Guemes": {"mbsl": 811, "type": "logistics hub"},
    "Salta Capital": {"mbsl": 1187, "type": "administrative center"},
    "San Antonio de los Cobres": {"mbsl": 3775, "type": "checkpoint"},
    "Salar de Pocitos": {"mbsl": 3650, "type": "lithium project"},
    "Salar del Hombre Muerto": {"mbsl": 4000, "type": "lithium mine"},
    "Tolar Grande": {"mbsl": 3508, "type": "opetational base"}
}

#Route list
routes = [
    ("Gral Guemes", "San Antonio de los Cobres", 224),
    ("San Antonio de los Cobres", "Salar de Pocitos", 108),
    ("Salar de Pocitos", "Salar del Hombre Muerto", 100),
    ("Salta Capital", "San Antonio de los Cobres", 170),
    ("Gral Guemes", "Salta Capital", 59),
    ("San Antonio de los Cobres", "Salar del Hombre Muerto", 200)
]

print(coordinate)
print(locations)
print(routes)

{'Gral Guemes': {'latitude': -24.6667, 'longitude': -65.05}, 'Salta Capital': {'latitude': -24.7859, 'longitude': -65.4117}, 'San Antonio de los Cobres': {'latitude': -24.2305, 'longitude': -66.6181}, 'Salar de Pocitos': {'latitude': -24.3833, 'longitude': -67.0}, 'Salar del Hombre Muerto': {'latitude': -25.4667, 'longitude': -67.1333}, 'Tolar Grande': {'latitude': -24.5886, 'longitude': -67.4058}}
{'Gral Guemes': {'mbsl': 811, 'type': 'logistics hub'}, 'Salta Capital': {'mbsl': 1187, 'type': 'administrative center'}, 'San Antonio de los Cobres': {'mbsl': 3775, 'type': 'checkpoint'}, 'Salar de Pocitos': {'mbsl': 3650, 'type': 'lithium project'}, 'Salar del Hombre Muerto': {'mbsl': 4000, 'type': 'lithium mine'}, 'Tolar Grande': {'mbsl': 3508, 'type': 'opetational base'}}
[('Gral Guemes', 'San Antonio de los Cobres', 224), ('San Antonio de los Cobres', 'Salar de Pocitos', 108), ('Salar de Pocitos', 'Salar del Hombre Muerto', 100), ('Salta Capital', 'San Antonio de los Cobres', 170), ('Gr

In [5]:
#Create dataset with articial data

def generate_mining_dataset(n_samples=20000):
    data=[]
    start_date = datetime(2023, 1, 1)
    
    for i in range(n_samples):
        route = random.choice(routes)
        origin, destiny, base_distance = route[0], route[1], route[2]
        
        #Date and weather simulation
        date = start_date + timedelta(days=random.randint(0, 365), hours=random.randint(0, 23))
        month = date.month
        weather = random.choices(["clear", "white wind", "rain", "snow"], weights=[70, 10, 15, 5])[0]
        
        #Loading and transportation variables
        #driver_experience in years
        type_truck = random.choice(["bitrain", "semitrailer", "4x4 truck"])
        charge_tons = round(random.uniform(20, 45), 2)
        driver_experience = random.randint(1, 20)
        
        #Variable mountain speed
        #base_speed = 50/km per hs reduced by altitud and weather 
        base_speed = 50 - (charge_tons * 0.2)
        if weather == "white wind": base_speed *= 0.4
        if locations[destiny]["mbsl"] > 3500:  base_speed *= 0.8 #puna effect
        
        estimated_time = base_distance / base_speed
        random_delay = random.uniform(0, 2) if weather == "clear" else random.uniform(2, 8)
        real_time = estimated_time + random_delay
        
        #Fuel consumption (liters/100km)
        #Increases with altitude
        consumption = (35 + (charge_tons * 0.5)) * (1 + (locations[destiny]["mbsl"] / 10000))
        
        data.append([
            date, origin, destiny, locations[destiny]["mbsl"],
            base_distance, type_truck, charge_tons, weather,
            driver_experience, round(estimated_time, 2), round(real_time, 2),
            round(consumption, 2)
        ])
        
    df = pd.DataFrame(data, columns=[
            "date", "origin", "destiny", "altitude_destiny_mbsl",
            "distance_km", "unit_type", "charge_tons", "weather",
            "driver_experience_years", "theorical_time_arrival_hs",
            "real_time_hs", "liters_consumed_100km"
        ])
        
        #Column target for ml model
    df["critical_delay"] = (df['real_time_hs'] > df["theorical_time_arrival_hs"] + 1.2).astype(int)
        
    return df

#Generate and save 
df_mining = generate_mining_dataset(20000)
df_mining.to_csv("mining_logistics.csv", index=False)

df_coordinate = pd.DataFrame.from_dict(coordinate, orient="index").reset_index()
df_coordinate.columns = ["locations", "latitude", "longitude"]
df_coordinate.to_csv("coordinate.csv", index=False)

print(f"Generated {len(df_mining)} rows.")
print("Generate Files: 'mining_logistics.csv' and 'coordinate.csv'")

Generated 20000 rows.
Generate Files: 'mining_logistics.csv' and 'coordinate.csv'
