In [None]:
# == INSTALACIONES REQUERIDAS ==
!pip install geopy tqdm skyfield xarray netCDF4 rioxarray requests


In [None]:
# == IMPORTS ==
import pandas as pd
import numpy as np
from datetime import datetime, timezone, timedelta
from geopy.distance import geodesic
from tqdm import tqdm
from functools import lru_cache
import requests
import xarray as xr
from skyfield.api import load, Topos
from skyfield.almanac import sunrise_sunset, find_discrete


In [None]:
# == CARGAR EFEMÉRIDES ==
eph = load('de421.bsp')
ts = load.timescale()


In [None]:
# == CARGAR DATASET SEA ICE ==
# Cambia la URL por tu dataset real, o sube el archivo .nc a Colab
!wget -O sea_ice_concentration.nc "URL_DEL_NETCDF_DE_SEA_ICE"
ds_seaice = xr.open_dataset('sea_ice_concentration.nc')

In [None]:
# == FUNCIONES BASE ==

def calculate_distance(lat1, lon1, lat2, lon2):
    R = 6371.0
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return R * c

def calculate_velocity(distance, time_diff):
    return distance / time_diff if time_diff > 0 else 0

def calculate_acceleration(velocity1, velocity2, time_diff):
    return (velocity2 - velocity1) / time_diff if time_diff > 0 else 0

@lru_cache(maxsize=100000)
def cached_daylight_info(date_str, lat, lon):
    date = datetime.strptime(date_str, '%Y-%m-%d').replace(tzinfo=timezone.utc)
    t0 = ts.utc(date.year, date.month, date.day)
    t1 = ts.utc((date + timedelta(days=1)).year, (date + timedelta(days=1)).month, (date + timedelta(days=1)).day)
    observer = Topos(latitude_degrees=lat, longitude_degrees=lon)
    f = sunrise_sunset(eph, observer)
    times, events = find_discrete(t0, t1, f)
    sunrise = sunset = None
    for ti, event in zip(times, events):
        if event == 1: sunrise = ti.utc_datetime()
        elif event == 0: sunset = ti.utc_datetime()
    if sunrise and sunset:
        daylight_hours = (sunset - sunrise).total_seconds() / 3600
        return daylight_hours, False, False
    elif sunrise is None and sunset is None:
        alt = eph['Earth'] + observer
        sun_alt = alt.at(t0).observe(eph['Sun']).apparent().altaz()[0].degrees
        if sun_alt > 0: return 24.0, False, True
        else: return 0.0, True, False
    else:
        return None, None, None

@lru_cache(maxsize=100000)
def get_env_data(lat, lon, date_str):
    url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        'latitude': lat,
        'longitude': lon,
        'start_date': date_str,
        'end_date': date_str,
        'daily': ['temperature_2m_max', 'cloudcover', 'windspeed_10m_max'],
        'timezone': 'UTC'
    }
    try:
        r = requests.get(url, params=params, timeout=10)
        r.raise_for_status()
        data = r.json()
        temp = data['daily']['temperature_2m_max'][0]
        cloud = data['daily']['cloudcover'][0]
        wind = data['daily']['windspeed_10m_max'][0]
        return temp, cloud, wind
    except:
        return np.nan, np.nan, np.nan

def get_sea_ice_cover(lat, lon, date_str):
    try:
        date = np.datetime64(date_str)
        val = ds_seaice['sea_ice_concentration'].sel(time=date, method='nearest') \
            .sel(lat=lat, lon=lon, method='nearest').values.item()
        return val
    except:
        return np.nan

cached_sea_ice = lru_cache(maxsize=100000)(get_sea_ice_cover)


In [None]:
# == EJEMPLO CARGA DATASET ==
# df = pd.read_csv('polarBear_CTCRWlocations_chukchiBeaufort_1985-2017.csv')
# Asegúrate que timestamp es datetime UTC
# df['timestamp'] = pd.to_datetime(df['timestamp'], utc=True)

# == PREPARAR DATAFRAME ==
df = df.sort_values(['UniqueAnimalID', 'timestamp']).reset_index(drop=True)

df['prev_lat'] = df.groupby('UniqueAnimalID')['mu_lat'].shift(1)
df['prev_lon'] = df.groupby('UniqueAnimalID')['mu_lon'].shift(1)
df['prev_time'] = df.groupby('UniqueAnimalID')['timestamp'].shift(1)
df['prev_se_x'] = df.groupby('UniqueAnimalID')['se_mu_x'].shift(1)
df['prev_se_y'] = df.groupby('UniqueAnimalID')['se_mu_y'].shift(1)

df['time_diff_hours'] = (df['timestamp'] - df['prev_time']).dt.total_seconds() / 3600

# Error estándar total en metros y desplazamiento
df['se_total'] = np.sqrt(df['se_mu_x']**2 + df['se_mu_y']**2)
df['prev_se_total'] = np.sqrt(df['prev_se_x']**2 + df['prev_se_y']**2)

# Calcular distancia y error en km
df['distance_km'] = df.apply(
    lambda r: geodesic((r['prev_lat'], r['prev_lon']), (r['mu_lat'], r['mu_lon'])).kilometers
    if pd.notnull(r['prev_lat']) else 0, axis=1)

df['distance_error_km'] = (df['se_total'].fillna(0) + df['prev_se_total'].fillna(0)) / 1000

# Ajustar distancia restando error (no negativo)
df['distance_adj_km'] = (df['distance_km'] - df['distance_error_km']).clip(lower=0)

# Calcular velocidad con distancia ajustada
df['velocity_kmh'] = df['distance_adj_km'] / df['time_diff_hours']
df.loc[df['time_diff_hours'] == 0, 'velocity_kmh'] = 0

# Calcular aceleración (velocidad actual - anterior) / tiempo
df['prev_velocity_kmh'] = df.groupby('UniqueAnimalID')['velocity_kmh'].shift(1)
df['acceleration_kmh2'] = (df['velocity_kmh'] - df['prev_velocity_kmh']) / df['time_diff_hours']
df.loc[df['time_diff_hours'] == 0, 'acceleration_kmh2'] = 0


In [None]:
# == CALCULAR DAYLIGHT INFO CON tqdm Y lru_cache ==
from tqdm.notebook import tqdm
tqdm.pandas()

df['date_str'] = df['timestamp'].dt.strftime('%Y-%m-%d')

df[['daylight_hours', 'is_polar_night', 'is_midnight_sun']] = \
    pd.DataFrame(df.progress_apply(lambda r: cached_daylight_info(r['date_str'], r['mu_lat'], r['mu_lon']), axis=1).tolist(), index=df.index)


In [None]:
# == CONSULTAR DATOS AMBIENTALES Y MARCAR EN EL DATAFRAME ==
def fetch_env(row):
    temp, cloud, wind = get_env_data(row['mu_lat'], row['mu_lon'], row['date_str'])
    sea_ice = cached_sea_ice(row['mu_lat'], row['mu_lon'], row['date_str'])
    return pd.Series({'temp_surface': temp, 'cloud_cover': cloud, 'wind_speed': wind, 'sea_ice_cover': sea_ice})

df_env = df.progress_apply(fetch_env, axis=1)
df = pd.concat([df, df_env], axis=1)
