In [8]:
from PIL import Image
import requests
import numpy as np
from PIL import Image

import pandas as pd 
import geopandas as gpd
# import osmnx as ox
# import networkx as nx

import pyproj
from shapely.ops import transform

import shapely
from shapely import Point

# import boto3
import os
import django

pd.set_option('display.precision', 2)

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "water.settings")
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
django.setup()

from water.models import ReservoirState, Reservoir, ReservoirStateSerializer, RainFall
from django.db.models import Count

from water.utils import parse as p

In [2]:
filename_all = '../../../../data/datasets/all_parsed_cleaned.csv'
df_all_raw = pd.read_csv(filename_all).sort_values(['province', 'reservoir', 'ds'])

In [3]:
capacities = df_all_raw.groupby(['province', 'reservoir'])['capacity_hm3'].agg(['last', 'nunique']).reset_index()

df_all_raw.query('reservoir=="el limonero"').capacity_hm3.unique()

array([24.7, 22.3])

In [42]:
ds_start = "2012-09-01"
province = "malaga"
num_states = 10000
num_res_max = 5

capacities = df_all_raw.groupby(['province', 'reservoir'])['capacity_hm3'].agg(['last', 'nunique']).reset_index()
capacities = capacities[capacities.province == province].copy()
capacities = capacities.sort_values('last', ascending=False).head(num_res_max)
# capacities = capacities[capacities['nunique'] ==1].copy()
# assert capacities['nunique'].max() == 1
capacities

df_selected = df_all_raw[df_all_raw.reservoir.isin(capacities.reservoir.unique())]
df_selected = df_selected[df_selected.ds >= ds_start].copy()

capacities

Unnamed: 0,province,reservoir,last,nunique
63,malaga,la vinnuela,165.4,1
61,malaga,guadalteba,153.3,1
60,malaga,guadalhorce,125.7,1
58,malaga,conde de guadalhorce,66.5,1
62,malaga,la concepcion,61.9,1


In [43]:
df_selected.ds.min()

'2012-09-01'

In [44]:
ReservoirState.objects.all().delete()
Reservoir.objects.all().delete()

for _, row in capacities.iterrows():
    reservoir = Reservoir.objects.create(name=row['reservoir'], capacity=row['last'])
    reservoir.save()
    
print(len(df_selected))
    
for _, row in df_selected.head(num_states).iterrows():
    reservoir = Reservoir.objects.get(name=row['reservoir'])
    state = ReservoirState.objects.create(
        reservoir=reservoir,
        date=row['ds'],
        volume=row['stored_hm3'],
    )
    state.save()
    
ReservoirState.objects.all().count()

2859


2859

In [45]:
df_all = p.add_cols(df_selected)
df_all['date_lag'] = df_all.groupby(['province', 'reservoir'])['date'].shift(1)
df_all['date_diff'] = (df_all.date - df_all.date_lag).dt.days

cols = ['rainfallsince', 'stored_hm3', 'capacity_hm3']
for var in ['rainfallsince', 'stored_hm3']:
    df_all[f'{var}_diff'] = df_all.groupby(['province', 'reservoir'])[var].diff()
    df_all[f'{var}_diff_0'] = df_all[f'{var}_diff']
    for lags in range(1, 10):
        df_all[f'{var}_diff_{lags}'] = df_all.groupby(['province', 'reservoir'])[f'{var}_diff'].shift(lags)

# Delete all rainfall objects
RainFall.objects.all().delete()

# Get all reservoirs with state data
reservoirs = Reservoir.objects.annotate(
        num_states=Count("reservoir_reservoirstate")
    ).filter(num_states__gt=0)

reservoir_names = reservoirs.values_list('name', flat=True)

for _, row in df_all[df_all.reservoir.isin(reservoir_names)].head(num_states).iterrows():
    reservoir = Reservoir.objects.get(name=row['reservoir'])
    rainfall = RainFall.objects.create(
        date=row['ds'],
        reservoir=reservoir,
        amount=row['rainfallsince_diff'],
        amount_cumulative=row['rainfallsince'],
        amount_cumulative_historical=row['avgrainfall1971_2000'],
    )
len(df_all), RainFall.objects.all().count()

(2859, 2859)

In [46]:
df_rain = df_all[df_all.reservoir.isin(reservoir_names)].head(num_states)
df_rain[df_rain.ds.str.endswith('09-01')][['reservoir', 'date', 'rainfallsince']]

Unnamed: 0,reservoir,date,rainfallsince
10338,conde de guadalhorce,2012-09-01,269.7
10411,conde de guadalhorce,2014-09-01,241.9
10453,conde de guadalhorce,2015-09-01,334.1
10476,conde de guadalhorce,2016-09-01,323.2
10722,conde de guadalhorce,2023-09-01,195.8
18432,guadalhorce,2012-09-01,242.5
18464,guadalhorce,2013-09-01,624.7
18495,guadalhorce,2014-09-01,234.9
18537,guadalhorce,2015-09-01,296.9
18560,guadalhorce,2016-09-01,334.9


In [25]:
RainFall.objects.all().count(), ReservoirState.objects.all().count()

(774, 796)

In [47]:
ReservoirState.objects.all()[0].date

datetime.date(2012, 9, 1)

In [27]:
# Get the min data in reservoirstate
min_date = ReservoirState.objects.all().order_by('date').first().date
min_date

datetime.date(2022, 9, 1)

In [15]:
rs = ReservoirState.objects.all()
date = '2023-09-01'

# Find all rs with that date
rs.filter(date=date).count()

7