In [ ]:
# 1. Instalar dependencias
!pip install xarray netCDF4 geopy tqdm skyfield requests

In [ ]:
# 2. Importar librerías
import pandas as pd
import numpy as np
import xarray as xr
from geopy.distance import geodesic
from tqdm.notebook import tqdm
from functools import lru_cache
from datetime import datetime, timezone, timedelta
from skyfield.api import load, Topos
from skyfield.almanac import sunrise_sunset, find_discrete
import requests

tqdm.pandas()

In [ ]:
# 3. Funciones de cálculo para posición, distancia y tiempo solar
def adjusted_position_mc(lat, lon, se_x, se_y, n_samples=50):
    lats = np.random.normal(lat, se_y, n_samples)
    lons = np.random.normal(lon, se_x, n_samples)
    return lats.mean(), lons.mean()

def haversine_distance(lat1, lon1, lat2, lon2):
    R = 6371.0
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1; dlon = lon2 - lon1
    a = np.sin(dlat/2)**2 + np.cos(lat1)*np.cos(lat2)*np.sin(dlon/2)**2
    return 2*R*np.arctan2(np.sqrt(a), np.sqrt(1-a))

def calculate_velocity(distance_km, time_sec):
    return (distance_km / time_sec) * 3600 if time_sec>0 else 0

def calculate_acceleration(v1, v2, time_sec):
    return (v2 - v1) / (time_sec/3600) if time_sec>0 else 0

# 4. Efemérides – horas de luz
eph = load('de421.bsp')
ts = load.timescale()
@lru_cache(maxsize=50000)
def get_daylight(lat, lon, date_str):
    d = datetime.strptime(date_str,'%Y-%m-%d').replace(tzinfo=timezone.utc)
    t0 = ts.utc(d.year, d.month, d.day)
    t1 = ts.utc((d+timedelta(days=1)).year, (d+timedelta(days=1)).month, (d+timedelta(days=1)).day)
    obs = Topos(latitude_degrees=lat, longitude_degrees=lon)
    f = sunrise_sunset(eph, obs)
    times, events = find_discrete(t0, t1, f)
    sunr = suns = None
    for ti, ev in zip(times, events):
        if ev==1: sunr = ti.utc_datetime()
        elif ev==0: suns = ti.utc_datetime()
    if sunr and suns:
        dh = (suns - sunr).total_seconds()/3600
        return dh, False, False
    else:
        alt = eph['Earth']+obs
        sun_alt = alt.at(t0).observe(eph['Sun']).apparent().altaz()[0].degrees
        if sun_alt>0: return 24.0, False, True
        else: return 0.0, True, False

# 5. Datos ambientales remotos
url = 'https://polarwatch.noaa.gov/erddap/griddap/nsidcG02202v4nh1day'
ds = xr.open_dataset(url, engine='netcdf4').sel(time=slice('1985-01-01','2017-12-31'))
vars_env = ['seaice_conc','temp_surface','wind_speed','cloud_cover']
@lru_cache(maxsize=50000)
def get_env(ds, var, lat, lon, date_str):
    try:
        return float(ds[var].sel(time=np.datetime64(date_str), latitude=lat, longitude=lon, method='nearest').values)
    except: return np.nan

In [ ]:
# 6. Cargar y ordenar CSV principal
df = pd.read_csv('polarBear_CTCRWlocations_chukchiBeaufort_1985-2017.csv')
df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df.sort_values(['UniqueAnimalID','timestamp']).reset_index(drop=True)


In [ ]:
# 7. Procesamiento por batches
batch_size = 10000
res = []
for i in tqdm(range(0, len(df), batch_size), desc='Batches'):
    b = df.iloc[i:i+batch_size].copy()
    b['adj_lat'], b['adj_lon'] = zip(*b.progress_apply(lambda r: adjusted_position_mc(r['mu_lat'],r['mu_lon'],r['se_mu_x'],r['se_mu_y']),axis=1))
    b['date_str'] = b['timestamp'].dt.strftime('%Y-%m-%d')
    b['lat2'] = b['adj_lat'].round(2)
    b['lon2'] = b['adj_lon'].round(2)
    dists, vels, accs = [], [], []
    for aid, g in b.groupby('UniqueAnimalID'):
        g = g.reset_index(drop=True)
        ds_ = [0]; vs=[0]; ac=[0]
        for j in range(1,len(g)):
            dt = (g.loc[j,'timestamp']-g.loc[j-1,'timestamp']).total_seconds()
            dk = haversine_distance(g.loc[j-1,'adj_lat'],g.loc[j-1,'adj_lon'],g.loc[j,'adj_lat'],g.loc[j,'adj_lon'])
            dists.append(dk); vels.append(calculate_velocity(dk,dt))
        for j in range(1,len(vels)):
            dt=(g.loc[j,'timestamp']-g.loc[j-1,'timestamp']).total_seconds()
            accs.append(calculate_acceleration(vels[j-1],vels[j],dt))
    b['distance_km']=dists; b['velocity_kmh']=vels; b['acceleration_kmh2']=accs
    b[['daylight_hours','is_polar_night','is_midnight_sun']] = b.progress_apply(lambda r: pd.Series(get_daylight(r['lat2'],r['lon2'],r['date_str'])), axis=1)
    for var in vars_env:
        b[var] = b.progress_apply(lambda r: get_env(ds,var,r['lat2'],r['lon2'],r['date_str']),axis=1)
    res.append(b)
df_final = pd.concat(res).reset_index(drop=True)


In [ ]:
# 8. Revisa resultados
df_final.head()