In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.offline as pyo
import plotly.graph_objs as go
pyo.init_notebook_mode(connected=True)

- Gerando base de dados e leitura dos dados

In [3]:
%run gerar_ribeiroGoncalves.py



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [82]:
ts = pd.read_csv('ribeiroGon.csv', index_col=0).drop_duplicates(subset=['month', 'year'])

- Utilizando somente os dados dos ultimos 30 anos

In [83]:
ts = ts[ts['year'] >= (max(ts['year']) - 30)]

# Dados Fluviométricos

In [84]:
def running_mean(x):
    # Série a ser iterada
    ts = range(len(x))[1:-1]
    # retornando lista com a média dos elementos vizinhos
    return [(x[i-1]+x[i+1])/2 for i in ts]

In [85]:
def running_mean2(x, N):
    cumsum = np.cumsum(np.insert(x, 0, 0)) 
    return (cumsum[N:] - cumsum[:-N]) / float(N)

In [86]:
ts.head()

Unnamed: 0,month,year,vazao,data_ticks
283,1,1988,274.334,1988-1
284,2,1988,300.32,1988-2
285,3,1988,367.603,1988-3
286,4,1988,282.685,1988-4
287,5,1988,213.099,1988-5


In [87]:
x = ts.sort_values(['year','month'])['data_ticks']
y = ts.sort_values(['year','month'])['vazao'].values
x_raw = list(range(len(x)))

In [88]:
trace = go.Scatter(x = x, y = y, name = 'Medias')

trace_moving_avg = go.Scatter(x = x, y = running_mean(y), name = 'Média móvel (neighbors)')

trace_moving_avg2 = go.Scatter(x = x[2:], y = running_mean2(y,4), name = 'Média móvel (cumsum)')

tendencia = go.Scatter()

layout = go.Layout(title = 'Medias Vazão Rio Parnaíba - Ribeiro Gonçalves',
                   xaxis = dict(title = 'Data', type ='category', tickangle=45, dtick = 24),
                   yaxis = dict(title = 'Vazão média (m^3/s)'))

fig = go.Figure(data = [trace, trace_moving_avg, trace_moving_avg2], layout = layout)

pyo.iplot(fig, 'vazoes.html', config = dict(showLink = False))

In [89]:
np.mean(ts[ts['year'] == 2018][['month','vazao']].groupby('month').mean()['vazao'].values)

173.3656

In [90]:
x = ['Jan','Fev','Mar','Abr', 'Maio','Jun','Jul','Ago','Set','Out','Nov','Dez']
trace = go.Bar(x = x,
                   y = ts[['month','vazao']].groupby('month').mean()['vazao'].values)

media_geral = np.mean(ts[['month','vazao']].groupby('month').mean()['vazao'].values)

trace_line = go.Scatter(x = x, y = [media_geral]*12, mode = 'lines')

layout = go.Layout(title = 'Médias mensais', yaxis = dict(title = 'Vazões'), xaxis = dict(title = 'Meses'), showlegend=False)
fig = go.Figure(data = [trace, trace_line], layout = layout)

pyo.iplot(fig, config=dict(showLink = False))

- Segundo a média: meses de seca = Maio a Novembro e meses de cheia = Dez a Abr

In [91]:
data = []
meses_list = ['Jan','Fev','Mar','Abr', 'Maio','Jun','Jul','Ago','Set','Out','Nov','Dez']
for mes in np.arange(1,13,1):
    data.append(go.Box(y = ts[ts['month'] == mes]['vazao'], name = meses_list[mes-1]))

layout = go.Layout(title = 'Boxplot Distribuição vazões médias', showlegend = False)

fig = go.Figure(data = data, layout = layout)

pyo.iplot(fig, config=dict(showLink=False))

In [92]:
layout = go.Layout(title = 'Distribuição vazões', barmode = 'overlay')

fig = go.Figure(data = [go.Histogram(x = ts['vazao'], opacity = 0.75, name = 'Observado'),
                        go.Histogram(x = running_mean2(y,4), opacity = 0.75, name = 'Média movel')],
                        layout = layout)
pyo.iplot(fig, config=dict(showLink = False))

## Long term flow duration

In [99]:
cumsum = np.cumsum(ts['vazao'])
#[i for i in range(len(cumsum))][::-1]
x = 100*cumsum/np.max(cumsum)

trace = go.Scatter(y=np.sort(ts['vazao'])[::-1], x=x,
                     marker=dict(color='rgb(150, 25, 120)'), name = 'Vazão original', mode = 'markers')
trace2 = go.Scatter(x=x, y = np.sort(running_mean2(y,4))[::-1], name = 'Vazão com média móvel', mode = 'markers')
trace3 = go.Scatter(y = [203.01851219512204,203.01851219512204], x = [0,100],
                    marker = dict(color = 'gray'), name = 'Vazão média', mode = 'lines',
                    line = dict(dash = 'dot'))
trace4 = go.Scatter(y = [ts['vazao'].min(), ts['vazao'].max()], x = [95,95],
                    marker = dict(color = 'gray'), name = 'Q_95', mode = 'lines',
                    line = dict(dash = 'dot'))
layout = go.Layout(
    title="Long term flow duration - Ribeiro Gonçalves",
    xaxis = dict(title = '% tempo'), yaxis = dict(title = 'Taxa de vazão Q (m^3/s)'),
    hovermode = 'closest'
)
fig = go.Figure(data=[trace,trace2,trace3, trace4], layout=layout)
pyo.iplot(fig, filename='cdf-dataset')

In [75]:
print('Vazão média:', ts['vazao'].mean())
print('Q95:', 125.06)
print('How flashy the watercourse is?', 125.06/ts['vazao'].mean())

Vazão média: 203.01851219512204
Q95: 125.06
How flashy the watercourse is? 0.6160029380956366


Segundo o link do Rodrigo, o nosso rio não tende a inundar. Ver o (link)[http://www.renewablesfirst.co.uk/hydropower/hydropower-learning-centre/what-is-a-flow-duration-curve/].

Último parágrafo com as infos:
>Flow rates between Q0 and Q10 are considered high flow rates, and Q0 to Q1 would be extreme flood events. It is important that hydropower systems are designed to cope with such extreme flows. Flows from Q10 to Q70 would be the ‘medium’ range of flows and you would want your hydropower system to operate efficiently right across these flow rates. Flow rates from Q70 to Q100 are the ‘low flows’ when hydropower systems will just be operating but at a low power output, and as you move further to the right on the FDC hydro systems will begin to shut down due to low flow. As flow rates move from Q95 towards Q100 you move into the low-flow draught flows.

## Decomposição de ts

# Cumsum running mean

https://www.dallasfed.org/research/basics/moving.aspx

https://stats.stackexchange.com/questions/144013/smoothing-when-to-use-it-and-when-not-to