In [9]:
import pandas as pd
import math
import os
from datetime import datetime
import xarray as xr

# Listado de playas con avistamientos y fechas

In [10]:
avistamientos_df = pd.read_excel("./Physalia_Ambiental_R.xlsx")
# avistamientos_df = pd.read_excel("../Physala_Data/Datos_Physalia_20171010.xls")

columnas = avistamientos_df.iloc[0]

#Quito las 3 primeras filas, debido al formato de la excell
avistamientos_df = avistamientos_df.iloc[3:] 
avistamientos_df.columns = columnas

# Me quedo solo con los datos de avistamientos
avistamientos_df = avistamientos_df[["Latitud","Longitud","Año","Mes","Día","Avistamientos"]]

#Transdormaciones para sacar con fecha (datetime)
avistamientos_fecha_df=avistamientos_df[["Año","Mes","Día"]]
avistamientos_fecha_df.columns = ["year","month","day"]

fecha = pd.to_datetime(avistamientos_fecha_df)

avistamientos_df["Fecha"]=fecha
avistamientos_df=avistamientos_df[["Latitud","Longitud","Fecha","Avistamientos"]]
avistamientos_df.head()

Unnamed: 0,Latitud,Longitud,Fecha,Avistamientos
3,-25.4,-70.4833,2014-05-01,1
4,-18.4631,-70.3053,2014-05-01,1
5,-27.0658,-70.8259,2014-05-09,13
6,-18.4933,-70.3261,2014-05-10,15
7,-27.122,-70.8576,2014-05-10,23


# Exporto listado de playas para web

In [11]:
# Genera excel con las coordenadas de las playas
playas_df = avistamientos_df.reset_index()
playas_df = playas_df[['Latitud','Longitud']]
playas_df.to_excel('./playas.xlsx')

# Redondeo
Redondeo de latitud y longitud para juntar lecturas de una misma playa con coordenadas muy proximas

Se exporta el dataframe generado a un excel

In [12]:
def floorQuarter(x):
    return (floor(x * 4) / 4.0) 

#Se añaden atributos con la longitud y latitud redondeadas a cada cuarto de grado
avistamientos_df["Lat_floor"] = avistamientos_df.Latitud.map(floorQuarter)
avistamientos_df["Long_floor"] = avistamientos_df.Longitud.map(floorQuarter)

df = avistamientos_df[['Lat_floor', 'Long_floor',"Fecha","Avistamientos"]]
df = df.set_index(['Lat_floor', 'Long_floor',"Fecha"])

#DataFrame total avistamientos de playas en esa cuadricula
df_sum = df.groupby(['Lat_floor', 'Long_floor',"Fecha"]).sum()

#DataFrame número de playas en esa cuadricula
df_count = df.groupby(['Lat_floor', 'Long_floor',"Fecha"]).count()

#Dataframe con el total de avistamientos y el número de playas
df_join = df_sum.join(df_count,lsuffix="I",rsuffix="R")
df_join.columns=['Suma', 'N_Playas']
df_join['Media_Playas'] = df_join.Suma /df_join.N_Playas

# DataFrame de avistamientos procesado
df_join.to_excel("avistamientos.xlsx")

df_join.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Suma,N_Playas,Media_Playas
Lat_floor,Long_floor,Fecha,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
-42.75,-74.25,2015-05-27,1,1,1.0
-42.0,-74.0,2014-06-04,1,1,1.0
-42.0,-73.75,2015-05-23,1,1,1.0
-41.75,-73.75,2015-06-10,20,1,20.0
-41.75,-73.75,2015-06-11,151,1,151.0


# Generar estructura
Se genera estructura con las coordenadas de las playas, los avistamientos y las condiciones metorológicas de su cuadrante

In [15]:
df_playas = df_join.reset_index()
# Cojo las columnas que me interesan
df_playas = df_playas[["Suma","Lat_floor","Long_floor","Fecha"]]
# Renombro las columnas 
df_playas = df_playas.rename(columns={"Suma": "Avistamientos", "Lat_floor": "Latitud", "Long_floor": "Longitud"})
# Ordeno por fecha
df_playas = df_playas.sort_values(by=["Fecha"])
# Listo todos los archivos de Copernicus
listado_archivos = os.listdir('../descargas')
# dataframe de salida
df_final = pd.DataFrame(columns = ["Avistamientos","Latitud","Longitud","Fecha","Profundidad","mlotst",'zos','bottomT','thetao','so','uo','vo'])

variables = ['Profundidad','mlotst','zos','bottomT','thetao','so','uo','vo']
contador = 0
for contador,[index, row] in enumerate(df_playas.iterrows()):
    # print(contador,index, row)
    # busca el archivo .nc de la fecha requerida
    texto ='_{}_'.format(str(row["Fecha"]).split()[0].replace('-',''))
    archivo = [x for x in listado_archivos if str(texto) in x]
    # cargo el archivo
    data = xr.open_dataset('../descargas/{}'.format(archivo[0]))
    # si los valores del cuadrante seleccionado fuesen nulos nos desplazamos hacia la izquierda hasta encontrar valores validos
    nulo = True
    desfase = 0
    while nulo == True:
        nulo = False
        xr_sel = data.sel({'latitude':row["Latitud"],'longitude':row["Longitud"]-desfase},method='nearest')
        nulo = math.isnan(xr_sel.mlotst.values[0])
        desfase += data.longitude.step
    
    datos = xr_sel.to_dataframe().reset_index()
    # copio los valores de las tres profundidades
    #1
#     df_final.loc[contador,["Avistamientos","Latitud","Longitud","Fecha"]] = row.values.tolist()
#     df_final.loc[contador,variables] = datos.loc[0,['depth', 'mlotst','zos','bottomT','thetao','so','uo','vo']].values.tolist()
#     #2
#     df_final.loc[contador + 1,["Avistamientos","Latitud","Longitud","Fecha"]] = row.values.tolist()
#     df_final.loc[contador + 1,variables] = datos.loc[1,['depth', 'mlotst','zos','bottomT','thetao','so','uo','vo']].values.tolist()
#     #3
#     df_final.loc[contador + 2,["Avistamientos","Latitud","Longitud","Fecha"]] = row.values.tolist()
#     df_final.loc[contador + 2,variables] = datos.loc[2,['depth', 'mlotst','zos','bottomT','thetao','so','uo','vo']].values.tolist()

    for x in range(3):
        df_final.loc[contador + x,["Avistamientos","Latitud","Longitud","Fecha"]] = row.values.tolist()
        df_final.loc[contador + x,variables] = datos.loc[x,['depth', 'mlotst','zos','bottomT','thetao','so','uo','vo']].values.tolist()
    
    print(contador, end='\r')    

df_final


437

Unnamed: 0,Avistamientos,Latitud,Longitud,Fecha,Profundidad,mlotst,zos,bottomT,thetao,so,uo,vo
0,1,-18.5,-70.5,2014-05-01 00:00:00,0.494025,10.5289,0.222785,13.3423,18.6518,34.9147,-0.0555437,0.0177007
1,1,-25.5,-70.5,2014-05-01 00:00:00,0.494025,10.6815,0.198675,13.1834,15.2078,34.7133,-0.108646,-0.116581
2,13,-27.25,-71,2014-05-09 00:00:00,0.494025,10.5289,0.124821,12.4538,14.4241,34.6522,-0.0427259,0.151372
3,23,-27.25,-71,2014-05-10 00:00:00,0.494025,10.5289,0.120548,12.4729,14.4065,34.6568,-0.0476089,0.0982696
4,15,-18.5,-70.5,2014-05-10 00:00:00,0.494025,10.5289,0.167241,13.5664,18.8371,34.9025,-0.117801,0.267953
...,...,...,...,...,...,...,...,...,...,...,...,...
435,4,-27.25,-71,2016-03-12 00:00:00,0.494025,10.5289,0.0820948,12.8289,20.187,34.695,0.116581,0.170293
436,5,-27,-71,2016-03-13 00:00:00,0.494025,10.5289,0.0964385,7.34724,21.0139,34.7255,0.13184,0.206915
437,2,-26.5,-70.75,2016-03-13 00:00:00,0.494025,10.5289,0.0680563,13.0881,19.9145,34.7255,-0.0207526,0.485244
438,2,-26.5,-70.75,2016-03-13 00:00:00,5.07822,10.5289,0.0680563,13.0881,19.3454,34.7179,-0.028077,0.432752


In [16]:
df_final_2 = df_final.reset_index()
# Ordeno y exporto dataFrame resultante
df_final_2 = df_final_2.set_index(["Latitud","Longitud","Fecha"])
df_final_2 = df_final_2.drop(['index'], axis=1)
df_final_2 = df_final_2.sort_values(by=["Latitud","Longitud"])
df_final_2.to_excel('dataframe_final.xlsx')
df_final_2


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Avistamientos,Profundidad,mlotst,zos,bottomT,thetao,so,uo,vo
Latitud,Longitud,Fecha,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
-42.75,-74.25,2015-05-27,1,0.494025,52.3392,0.103763,12.7256,12.7263,33.3552,0.0360118,-0.106815
-42.00,-74.00,2014-06-04,1,0.494025,13.7333,0.0622578,11.7126,11.5698,33.5505,-0.0231941,-0.0817896
-42.00,-73.75,2015-05-23,1,0.494025,31.7393,0.10651,12.4656,12.4531,33.3583,0.0122074,0.177007
-41.75,-73.75,2015-06-10,20,0.494025,13.8859,0.0570696,12.3733,12.3572,33.372,-0.0567644,0.0903348
-41.75,-73.75,2015-06-11,151,0.494025,13.8859,0.0207526,12.133,12.1169,33.3308,-0.153203,0.0677511
...,...,...,...,...,...,...,...,...,...,...,...
-18.50,-70.50,2014-05-01,1,0.494025,10.5289,0.222785,13.3423,18.6518,34.9147,-0.0555437,0.0177007
-18.50,-70.50,2014-05-10,15,0.494025,10.5289,0.167241,13.5664,18.8371,34.9025,-0.117801,0.267953
-18.50,-70.50,2014-07-18,1,0.494025,10.5289,-0.0253304,12.5864,15.1793,34.814,-0.0701926,0.0964385
-18.50,-70.50,2014-08-17,1,0.494025,10.6815,0.0408948,12.3572,15.9813,34.8109,-0.0225837,0.0579852
