In [2]:
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from numpy import argmax

import numpy as np
import pandas as pd

In [3]:
df = pd.read_csv('chennai_reservoir_levels.csv')

In [4]:
df['Date'] = pd.to_datetime(df['Date'])
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day

#The following code will allow for a better usage of the month so that the model can be as accurate as possible.

df['Month'] = df['Month'].astype(int) + df['Day'].astype(int)/30

#len(df)

For now, we are only going to focus on POONDI. However, this notebook can easily be modified to predict for the other 3 reservoirs too.

In [5]:
y = df.POONDI

In [6]:
features = ['Month', 'Year']

In [7]:
X = df[features]

X.describe()
X.head

X_train_full, X_valid_full, y_train, y_valid = train_test_split(X, y, train_size=0.8, test_size=0.2, random_state=0)

##Model Training

We first test what settings would work the best for us and give us the most accuracy, without overfitting

In [8]:
from sklearn.metrics import mean_absolute_error
from sklearn.tree import DecisionTreeRegressor

def get_mae(max_leaf_nodes, train_X, val_X, train_y, val_y):
    model = DecisionTreeRegressor(max_leaf_nodes=max_leaf_nodes, random_state=0)
    model.fit(train_X, train_y)
    preds_val = model.predict(val_X)
    mae = mean_absolute_error(val_y, preds_val)
    return(mae)

In [9]:
for max_leaf_nodes in [5, 50, 500, 5000, 50000]:
    my_mae = get_mae(max_leaf_nodes, X_train_full, X_valid_full, y_train, y_valid)
    print("Max leaf nodes: %d  \t\t Mean Absolute Error:  %d" %(max_leaf_nodes, my_mae))

Max leaf nodes: 5  		 Mean Absolute Error:  561
Max leaf nodes: 50  		 Mean Absolute Error:  370
Max leaf nodes: 500  		 Mean Absolute Error:  221
Max leaf nodes: 5000  		 Mean Absolute Error:  177
Max leaf nodes: 50000  		 Mean Absolute Error:  177


Here, we can see that the MAE is only 177, which is only around 5% error. Therefore our model has an accuracy of 95%

Now we make the final function required for the prediction of water level

In [10]:
X_valid_full.head()

Unnamed: 0,Month,Year
2608,2.7,2011
4160,5.766667,2015
1936,4.666667,2009
1831,5.033333,2009
3412,5.166667,2013


In [11]:
model = DecisionTreeRegressor(max_leaf_nodes=5000, random_state=0)
model.fit(X_train_full, y_train)

DecisionTreeRegressor(max_leaf_nodes=5000, random_state=0)

In [12]:
def predict(month, year):
  df1 = pd.DataFrame([[month, year]], columns = ['Month', 'Year'])
  preds_val = model.predict(df1)
  return(preds_val)

This is the final code that will work as an application

In [14]:
while True:
  print('Please enter the date of the prediction to be made. Note the format should be DD/MM/YYYY. Type "exit" to exit from the program: ')
  date = str(input())
  if date == 'exit':
    break
  try:
    day = int(date[0:2])
    month = int(date[3:5])
    year = int(date[6:])
    month = month + day/30
    prediction = predict(month, year)[0]
    print('The predicted water level of the reservoir POONDI on ' + date + ' is ' + str(prediction) + ' million cubic feet')
  except ValueError:
    print('Wrong format, please try again!')

Please enter the date of the prediction to be made. Note the format should be DD/MM/YYYY. Type "exit" to exit from the program: 
27/02/2022
The predicted water level of the reservoir POONDI on 27/02/2022 is 1616.0 million cubic feet
Please enter the date of the prediction to be made. Note the format should be DD/MM/YYYY. Type "exit" to exit from the program: 
27/02/2017
The predicted water level of the reservoir POONDI on 27/02/2017 is 818.0 million cubic feet
Please enter the date of the prediction to be made. Note the format should be DD/MM/YYYY. Type "exit" to exit from the program: 
27/02/2077
The predicted water level of the reservoir POONDI on 27/02/2077 is 1616.0 million cubic feet
Please enter the date of the prediction to be made. Note the format should be DD/MM/YYYY. Type "exit" to exit from the program: 
exit
