In [7]:
# type: ignore

import requests
import numpy as np

import pandas as pd 
import geopandas as gpd

import pyproj
from shapely.ops import transform

import shapely
from shapely import Point

# import boto3
import os
import django

pd.set_option('display.precision', 2)

wdwanted = '/Users/cg/Dropbox/code/Python/water/app/backend/'
os.chdir(wdwanted)

env_file = '../.env'
file_content = open(env_file).read()
lines = file_content.splitlines()
for line in lines:
    parts = line.split('=')
    if len(parts) == 2:
        key, value = parts
        os.environ[key] = value
        
os.environ['DB_USED']='local'

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "water.settings.dev")
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
django.setup()
from water.models import ReservoirState, Reservoir, ReservoirStateSerializer, RainFall

from water.utils import parse as p
from django.db.models import Count
from water.utils import data

# Reload the data module


In [62]:
import importlib

importlib.reload(data)
rd = data.get_reservoir_data()
rd[0].num_states, rd[0].volume_latest

(12, 13.57)

<Reservoir: Reservoir object (455)>

In [53]:
from django.db.models import OuterRef, Subquery, Max
from django.db.models.functions import Coalesce

most_recent_dates = (
        ReservoirState.objects.filter(reservoir=OuterRef("pk"))
        .order_by("-date")
        .values("date")[:1]
    )
most_recent_volume = (
    ReservoirState.objects.filter(
        reservoir=OuterRef("pk"), date='2023-09-01'
    )
    .values("volume")[:1]
)

reservoirs = Reservoir.objects.annotate(
    volume_latest=Subquery(most_recent_volume),
)

[r.volume_latest for r in reservoirs]
#reservoirs[1].recent_date, reservoirs[1].volume_latest #, reservoirs[1].num_states

[13.57, 49.88, 30.74, 11.97, 24.82, 5.96, 7.07]

[None, None, None, None, None, None, None]

In [48]:
ReservoirState.objects.all()[0].reservoir.uuid

UUID('ee2b86bc-5d79-4e77-81c5-0c76a172e90b')

In [51]:
Reservoir.objects.get(uuid='ee2b86bc-5d79-4e77-81c5-0c76a172e90b').uuid

UUID('ee2b86bc-5d79-4e77-81c5-0c76a172e90b')

In [35]:
# Get the sql for `reservoirs`
q = reservoirs.query

q_str = str(q)

# Show q_str with a line with of 100 charcters, using standard python textwrap
import textwrap
print(textwrap.fill(q_str.replace('water_', ''), width=100))

SELECT "reservoir"."id", "reservoir"."uuid", "reservoir"."name", "reservoir"."capacity",
"reservoir"."name_full", "reservoir"."province", (SELECT V0."volume" FROM "reservoirstate" V0 WHERE
(V0."date" = (SELECT U0."date" FROM "reservoirstate" U0 WHERE U0."reservoir_id" = (V0."id") ORDER BY
U0."date" DESC LIMIT 1) AND V0."reservoir_id" = ("reservoir"."id")) LIMIT 1) AS "volume_latest" FROM
"reservoir"


In [4]:
inputs = {
  'num_obs': 500,
 'start_date': '2000-01-01',
 'end_date': '2024-12-31',
 'is_first_of_year': True,
 'reservoir_uuids': ['ba76237b-e4d8-4603-bee1-4196e0cbc1a5']
}

data.get_reservoir_states_data(**inputs)

<QuerySet []>

In [5]:
data.get_wide_data(**inputs)

[]

<QuerySet []>

<QuerySet []>

In [17]:
data.get_wide_data(**inputs)

[]

In [18]:
inputs

{'num_obs': 500,
 'start_date': '2000-01-01',
 'end_date': '2024-12-31',
 'is_first_of_year': True,
 'reservoir_uuids': 'ba76237b-e4d8-4603-bee1-4196e0cbc1a5'}

In [13]:
filename_all = 'water/data_raw/all_parsed_cleaned.csv'
df_all_raw = pd.read_csv(filename_all).sort_values(['province', 'reservoir', 'ds'])
name_df = df_all_raw.reservoir.unique()

In [14]:
def pick_monthly(df):
    df_monthly = df[df.ds.str.slice(8, 10) == "01"].copy()
    return df_monthly

df = pick_monthly(df_all_raw)

In [19]:
df['month'] = df.ds.str.slice(5, 7)
df = df[df.month=='09']
df['year'] = df.ds.str.slice(0, 4)

df = df[df.year=='2023']
df.columns

Index(['ds', 'reservoir', 'province', 'rainfallsince', 'avgrainfall1971_2000',
       'capacity_hm3', 'stored_hm3', 'variation24h_hm3', 'weekchange',
       'yearchange', 'fillingpcttoday', 'weekago', 'yearago',
       'avgreserve5yrs_hm3', 'month', 'year'],
      dtype='object')

In [20]:
df.groupby('province').capacity_hm3.sum()

province
almeria     222.98
cadiz      1821.00
cordoba    3248.20
granada    1126.60
huelva     1410.00
jaen       2457.90
malaga      616.80
sevilla     956.20
Name: capacity_hm3, dtype: float64

In [24]:
df.query('province=="cordoba"')[['reservoir', 'rainfallsince', 'avgrainfall1971_2000', 'capacity_hm3', 'stored_hm3']]

Unnamed: 0,reservoir,rainfallsince,avgrainfall1971_2000,capacity_hm3,stored_hm3
3380,arenoso,405.3,539.3,167.0,23.54
4479,bembezar,412.7,640.4,342.1,34.71
19417,guadalmellato,465.0,602.7,146.6,45.21
21234,guadanunno,534.4,683.5,1.6,0.95
22946,iznajar,301.5,596.4,981.1,150.5
25761,la brenna,426.7,548.3,823.0,94.21
30316,martin gonzalo,449.0,584.9,17.8,2.56
33119,puente nuevo,456.4,620.4,281.7,31.19
34884,retortillo,426.6,659.6,61.2,6.31
37212,san rafael de navallana,485.1,547.9,156.5,59.44


In [2]:
reservoir = Reservoir.objects.all()[0]
reservoir.name_full

'Embalse de la Viñuela'

In [4]:
qs = Reservoir.objects.all()

type(qs)

django.db.models.query.QuerySet

In [64]:
filename_all = '../../../../data/datasets/all_parsed_cleaned.csv'
df_all_raw = pd.read_csv(filename_all).sort_values(['province', 'reservoir', 'ds'])
name_df = df_all_raw.reservoir.unique()


In [112]:
filename_all = '../../../../data/datasets/all_parsed_cleaned.csv'
df_all_raw = pd.read_csv(filename_all).sort_values(['province', 'reservoir', 'ds'])
name_df = df_all_raw.reservoir.unique()

file = '../../../../data/reservoirs/InfGeografica/InfVectorial/Gpkg/Inv_presas_embalses.gpkg'

# Open the file in geopandas
gdf_raw = gpd.read_file(file)
name_geo = gdf_raw[gdf_raw.nombre.notnull()].nombre.unique()

replace_vals = ['embalse del ', 'embalse de ', 'embalse ']

def clean_name(name):
    for val in replace_vals:
        name = name.lower().replace(val, '')
    return name

names_geo_dict = {name: clean_name(name) for name in name_geo}

In [73]:
name_df

array(['beninar', 'cuevas de almanzora', 'almodovar', 'arcos', 'barbate',
       'bornos', 'celemin', 'charco redondo', 'guadalcacin',
       'guadarranque', 'los hurones', 'zahara', 'zahara - el gastor',
       'arenoso', 'bembezar', 'guadalmellato', 'guadanunno', 'iznajar',
       'la brenna', 'martin gonzalo', 'puente nuevo', 'retortillo',
       'san rafael de navallana', 'sierra boyera', 'yeguas', 'beznar',
       'canales', 'colomera', 'cubillas', 'el portillo',
       'francisco abellan', 'los bermejales', 'negratin', 'quentar',
       'rules', 'san clemente', 'andevalo', 'aracena', 'chanza',
       'corumbel bajo', 'jarrama', 'los machos', 'piedras', 'zufre',
       'aguascebas', 'dannador', 'giribaile', 'guadalen', 'guadalmena',
       'jandula', 'la bolera', 'la fernandina', 'quiebrajano', 'rumblar',
       'tranco de beas', 'vadomojon', 'viboras', 'casasola',
       'conde de guadalhorce', 'el limonero', 'guadalhorce', 'guadalteba',
       'la concepcion', 'la vinnuela', 'ca

{'Embalse de Benínar': 'beninar',
 'Embalse de Cuevas de Almanzora': 'cuevas de almanzora',
 'Embalse de Almodóvar': 'almodovar',
 'Embalse de Arcos': 'arcos',
 'Embalse de Barbate': 'barbate',
 'Embalse de Bornos': 'bornos',
 'Embalse del Celemín': 'celemin',
 'Embalse de Charco Redondo': 'charco redondo',
 'Embalse de Guadalcacín': 'guadalcacin',
 'Embalse de Guadarranque': 'guadarranque',
 'Embalse de los Hurones': 'los hurones',
 'Embalse de Zahariche': 'zahara',
 'Embalse de Zahara - El Gastor': 'zahara - el gastor',
 'Embalse del Arenoso': 'arenoso',
 'Embalse del Bembézar': 'bembezar',
 'Embalse de Guadalmellato': 'guadalmellato',
 'Embalse de Guadalén': 'guadanunno',
 'Embalse de Iznájar': 'iznajar',
 'Embalse de la Feda': 'la brenna',
 'Embalse de Martín Gonzalo': 'martin gonzalo',
 'Embalse de Puente Nuevo': 'puente nuevo',
 'Embalse del Retortillo': 'retortillo',
 'Embalse de San Rafael de Navallana': 'san rafael de navallana',
 'Embalse de Sierra Boyera': 'sierra boyera',
 

cod_emb                                                     1.0
nombre                                Embalse de Tranco de Beas
cod_est                                                     E01
cod_roea                                                   5001
idemb_cbrh                                            4000115.0
cod_massup                                         ES0511100055
fuente_nom      IDE Confederación Hidrográfica del Guadalquivir
area                                                 17415295.0
perimetro                                               88713.0
fecha_alta                                                 2010
fecha_baja                                                    0
geometry      MULTIPOLYGON (((523504.7671098917 4229365.8424...
Name: 0, dtype: object

In [153]:
!ls -alh ../../../frontend/src/data 

total 320
drwxr-xr-x@  4 cg  staff   128B Mar 27 16:51 [1m[36m.[m[m
drwxr-xr-x@ 13 cg  staff   416B Mar 27 16:40 [1m[36m..[m[m
-rw-r--r--@  1 cg  staff    81K Mar 27 16:50 reservoirs.geojson
-rw-r--r--@  1 cg  staff    74K Mar 27 16:51 reservoirs.json


In [162]:
# type: ignore
filename = '../../../frontend/src/data/reservoirs.json'
gdf = gdf_raw[gdf_raw.nombre.isin(df_matches['name_geo_full'])].copy()
gdf['name_data'] = [dict_matches[n] for n in gdf.nombre]
gdf.geometry = gdf.geometry.simplify(100)
# Turn this into a geo crs
gdf = gdf.to_crs("EPSG:4326")
# Reduce the storage size by making the geometry less granular

gdf.to_file(filename, driver='GeoJSON')
gdf.iloc[0]

cod_emb                                                     1.0
nombre                                Embalse de Tranco de Beas
cod_est                                                     E01
cod_roea                                                   5001
idemb_cbrh                                            4000115.0
cod_massup                                         ES0511100055
fuente_nom      IDE Confederación Hidrográfica del Guadalquivir
area                                                 17415295.0
perimetro                                               88713.0
fecha_alta                                                 2010
fecha_baja                                                    0
geometry      POLYGON ((-2.731509444260477 38.2119503448655,...
name_data                                        tranco de beas
Name: 0, dtype: object

In [163]:
gdf.name_data.unique()

array(['tranco de beas', 'conde de guadalhorce', 'la brenna',
       'la fernandina', 'guadalmena', 'giribaile', 'aguascebas',
       'san clemente', 'el portillo', 'la bolera', 'negratin', 'dannador',
       'guadanunno', 'quiebrajano', 'rumblar', 'vadomojon',
       'sierra boyera', 'jandula', 'yeguas', 'martin gonzalo',
       'guadalmellato', 'san rafael de navallana', 'viboras',
       'puente nuevo', 'bembezar', 'retortillo', 'canales', 'quentar',
       'colomera', 'cubillas', 'los bermejales', 'iznajar', 'gergal',
       'zahara', 'jose toran', 'puebla de cazalla', 'el pintado',
       'melonares', 'aracena', 'zufre', 'la minilla', 'cala', 'el agrio',
       'torre del aguila', 'la concepcion', 'chanza', 'andevalo',
       'guadalcacin', 'zahara - el gastor', 'bornos', 'arcos',
       'los hurones', 'barbate', 'guadarranque', 'celemin',
       'charco redondo', 'almodovar', 'guadalteba', 'beninar', 'casasola',
       'huesna', 'beznar', 'la vinnuela', 'el limonero', 'corumbel b

In [50]:
RainFall.objects.all()

<QuerySet []>

In [46]:
ReservoirState.objects.all()[0].reservoir.name

'conde de guadalhorce'

In [33]:
reservoirs = Reservoir.objects.annotate(
        num_states=Count("reservoir_reservoirstate")
    )

# Get all reservoirs with num_states > 0
reservoirs = reservoirs.filter(num_states__gt=0)
reservoirs[0].num_states


100

In [36]:
reservoirs[0].name

'conde de guadalhorce'

In [20]:
np.power(100000, 1/3)

46.41588833612778

In [None]:
cm = 1000
hm3 = 100000 * cm

