In [1]:
import pandas as pd
from math import floor
import os
from datetime import datetime
import xarray as xr

# Listado de playas con avistamientos y fechas

In [2]:
avistamientos_df = pd.read_excel("./Physalia_Ambiental_R.xlsx")
# avistamientos_df = pd.read_excel("../Physala_Data/Datos_Physalia_20171010.xls")

columnas = avistamientos_df.iloc[0]

#Quito las 3 primeras filas, debido al formato de la excell
avistamientos_df = avistamientos_df.iloc[3:] 
avistamientos_df.columns = columnas

# Me quedo solo con los datos de avistamientos
avistamientos_df = avistamientos_df[["Latitud","Longitud","Año","Mes","Día","Avistamientos"]]

#Transdormaciones para sacar con fecha (datetime)
avistamientos_fecha_df=avistamientos_df[["Año","Mes","Día"]]
avistamientos_fecha_df.columns = ["year","month","day"]

fecha = pd.to_datetime(avistamientos_fecha_df)

avistamientos_df["Fecha"]=fecha
avistamientos_df=avistamientos_df[["Latitud","Longitud","Fecha","Avistamientos"]]
avistamientos_df.head()

Unnamed: 0,Latitud,Longitud,Fecha,Avistamientos
3,-25.4,-70.4833,2014-05-01,1
4,-18.4631,-70.3053,2014-05-01,1
5,-27.0658,-70.8259,2014-05-09,13
6,-18.4933,-70.3261,2014-05-10,15
7,-27.122,-70.8576,2014-05-10,23


# Exporto listado de playas para web

In [3]:
# Genera excel con las coordenadas de las playas
playas_df = avistamientos_df.reset_index()
playas_df = playas_df[['Latitud','Longitud']]
playas_df.to_excel('./playas.xlsx')

# Redondeo
Redondeo de latitud y longitud para juntar lecturas de una misma playa con coordenadas muy proximas

Se exporta el dataframe generado a un excel

In [4]:
def floorQuarter(x):
    return (floor(x * 4) / 4.0) 

#Se añaden atributos con la longitud y latitud redondeadas a cada cuarto de grado
avistamientos_df["Lat_floor"] = avistamientos_df.Latitud.map(floorQuarter)
avistamientos_df["Long_floor"] = avistamientos_df.Longitud.map(floorQuarter)

df = avistamientos_df[['Lat_floor', 'Long_floor',"Fecha","Avistamientos"]]
df = df.set_index(['Lat_floor', 'Long_floor',"Fecha"])

#DataFrame total avistamientos de playas en esa cuadricula
df_sum = df.groupby(['Lat_floor', 'Long_floor',"Fecha"]).sum()

#DataFrame número de playas en esa cuadricula
df_count = df.groupby(['Lat_floor', 'Long_floor',"Fecha"]).count()

#Dataframe con el total de avistamientos y el número de playas
df_join = df_sum.join(df_count,lsuffix="I",rsuffix="R")
df_join.columns=['Suma', 'N_Playas']
df_join['Media_Playas'] = df_join.Suma /df_join.N_Playas

# DataFrame de avistamientos procesado
df_join.to_excel("avistamientos.xlsx")

df_join.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Suma,N_Playas,Media_Playas
Lat_floor,Long_floor,Fecha,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
-42.75,-74.25,2015-05-27,1,1,1.0
-42.0,-74.0,2014-06-04,1,1,1.0
-42.0,-73.75,2015-05-23,1,1,1.0
-41.75,-73.75,2015-06-10,20,1,20.0
-41.75,-73.75,2015-06-11,151,1,151.0


# Generar estructura
Se genera estructura con las coordenadas de las playas, los avistamientos y las condiciones metorológicas de su cuadrante

In [108]:
df_playas = df_join.reset_index()
df_playas = df_playas[["Media_Playas","Lat_floor","Long_floor","Fecha"]]
df_playas = df_playas.rename(columns={"Media_Playas": "Avistamientos", "Lat_floor": "Latitud", "Long_floor": "Longitud"})
df_playas = df_playas.sort_values(by=["Fecha"])
# print(df_playas)
listado_archivos = os.listdir('../descargas')
# dataframe de salida
df_final = pd.DataFrame(columns = ["Avistamientos","Latitud","Longitud","Fecha","Profundidad","mlotst",'zos','bottomT','thetao','so','uo','vo'])
# df_final = df_final.set_index(["Avistamientos","Latitud","Longitud","Fecha"])
variables = ['Profundidad','mlotst','zos','bottomT','thetao','so','uo','vo']
contador = 0
for index, row in df_playas.iterrows():
    # busca el archivo .nc de la fecha requerida
    texto ='_{}_'.format(str(row["Fecha"]).split()[0].replace('-',''))
    archivo = [x for x in listado_archivos if str(texto) in x]
    # cargo el archivo
    data = xr.open_dataset('../descargas/{}'.format(archivo[0]))
       
    xr_sel = data.sel({'latitude':row["Latitud"],'longitude':row["Longitud"],'time':row["Longitud"]},method='nearest')
    datos = xr_sel.to_dataframe().reset_index()
    #1
    df_final.loc[contador,["Avistamientos","Latitud","Longitud","Fecha"]] = row.values.tolist()
    df_final.loc[contador,variables] = datos.loc[0,['depth', 'mlotst','zos','bottomT','thetao','so','uo','vo']].values.tolist()
    #2
    df_final.loc[contador + 1,["Avistamientos","Latitud","Longitud","Fecha"]] = row.values.tolist()
    df_final.loc[contador + 1,variables] = datos.loc[1,['depth', 'mlotst','zos','bottomT','thetao','so','uo','vo']].values.tolist()
    #3
    df_final.loc[contador + 2,["Avistamientos","Latitud","Longitud","Fecha"]] = row.values.tolist()
    df_final.loc[contador + 2,variables] = datos.loc[2,['depth', 'mlotst','zos','bottomT','thetao','so','uo','vo']].values.tolist()

    contador += 3

    print(contador, end='\r')    
    
datos
df_final


1314

Unnamed: 0,Avistamientos,Latitud,Longitud,Fecha,Profundidad,mlotst,zos,bottomT,thetao,so,uo,vo
0,1,-18.5,-70.5,2014-05-01 00:00:00,0.494025,10.5289,0.222785,13.3423,18.6518,34.9147,-0.0555437,0.0177007
1,1,-18.5,-70.5,2014-05-01 00:00:00,5.07822,10.5289,0.222785,13.3423,18.528,34.9208,-0.0476089,-0.0146489
2,1,-18.5,-70.5,2014-05-01 00:00:00,9.573,10.5289,0.222785,13.3423,18.3727,34.9437,-0.028077,-0.044557
3,1,-25.5,-70.5,2014-05-01 00:00:00,0.494025,,,,,,,
4,1,-25.5,-70.5,2014-05-01 00:00:00,5.07822,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
1309,5,-27,-71,2016-03-13 00:00:00,5.07822,10.5289,0.0964385,7.34724,20.8433,34.7209,0.125736,0.175787
1310,5,-27,-71,2016-03-13 00:00:00,9.573,10.5289,0.0964385,7.34724,20.313,34.7118,0.128788,0.136723
1311,2,-26.5,-70.75,2016-03-13 00:00:00,0.494025,10.5289,0.0680563,13.0881,19.9145,34.7255,-0.0207526,0.485244
1312,2,-26.5,-70.75,2016-03-13 00:00:00,5.07822,10.5289,0.0680563,13.0881,19.3454,34.7179,-0.028077,0.432752


In [114]:
df_final = df_final.reset_index()

df_final = df_final.set_index(["Latitud","Longitud","Fecha","Avistamientos"])
df_final = df_final.drop(['index'], axis=1)

df_final.to_excel('dataframe_final.xlsx')
df_final.head(20)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Profundidad,mlotst,zos,bottomT,thetao,so,uo,vo
Latitud,Longitud,Fecha,Avistamientos,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
-18.5,-70.5,2014-05-01,1.0,0.494025,10.5289,0.222785,13.3423,18.6518,34.9147,-0.0555437,0.0177007
-18.5,-70.5,2014-05-01,1.0,5.07822,10.5289,0.222785,13.3423,18.528,34.9208,-0.0476089,-0.0146489
-18.5,-70.5,2014-05-01,1.0,9.573,10.5289,0.222785,13.3423,18.3727,34.9437,-0.028077,-0.044557
-25.5,-70.5,2014-05-01,1.0,0.494025,,,,,,,
-25.5,-70.5,2014-05-01,1.0,5.07822,,,,,,,
-25.5,-70.5,2014-05-01,1.0,9.573,,,,,,,
-27.25,-71.0,2014-05-09,13.0,0.494025,10.5289,0.124821,12.4538,14.4241,34.6522,-0.0427259,0.151372
-27.25,-71.0,2014-05-09,13.0,5.07822,10.5289,0.124821,12.4538,14.2557,34.6522,-0.0482192,0.130619
-27.25,-71.0,2014-05-09,13.0,9.573,10.5289,0.124821,12.4538,14.178,34.6553,-0.0366222,0.118412
-27.25,-71.0,2014-05-10,23.0,0.494025,10.5289,0.120548,12.4729,14.4065,34.6568,-0.0476089,0.0982696
