In [34]:
import os
import pandas as pd
from functools import reduce
import numpy as np
from datetime import date
import tensorflow as tf
from keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.metrics import RootMeanSquaredError, MeanAbsoluteError
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.preprocessing.sequence import TimeseriesGenerator


In [35]:
df=pd.read_csv("C:/Users/91966/OneDrive/Desktop/capstone/telangana/weather/daily_weather_data.csv")
df=df[['district','odate','rainfall','temp_min','temp_max']].dropna()

In [36]:
df1=df.copy(deep=True)
df2=df.copy(deep=True)
df3=df.copy(deep=True)
df1['odate'] = pd.to_datetime(df1['odate'], format='%d-%m-%Y', errors='coerce')
df1.dropna(inplace=True)
df2['odate'] = pd.to_datetime(df2['odate'], format='%Y-%m-%d', errors='coerce')
df2.dropna(inplace=True)
df3['odate'] = pd.to_datetime(df3['odate'], format='%d-%b-%y', errors='coerce')
df3.dropna(inplace=True)

In [37]:
df_final=pd.concat([df1,df2,df3],axis=0).sort_index()
df_final=df_final.sort_values(by='odate')

In [38]:
df_comps={}
options=["Adilabad","Nizamabad","Warangal","Karimnagar","Khammam"]
for i in options:
  df_comps[i]=df_final[df_final["district"]==i]

In [39]:
df_comps['Adilabad']=df_comps['Adilabad'][['odate','temp_max']]
df_comps['Adilabad']

Unnamed: 0,odate,temp_max
0,2018-01-01,29.7
6706,2018-01-01,29.3
6339,2018-01-01,29.3
5945,2018-01-01,29.6
5580,2018-01-01,33.0
...,...,...
1033119,2022-09-30,31.4
1033938,2022-09-30,32.7
1030389,2022-09-30,33.7
1030116,2022-09-30,32.7


In [40]:

df=df_comps['Adilabad']
df=df.set_index('odate')
df

Unnamed: 0_level_0,temp_max
odate,Unnamed: 1_level_1
2018-01-01,29.7
2018-01-01,29.3
2018-01-01,29.3
2018-01-01,29.6
2018-01-01,33.0
...,...
2022-09-30,31.4
2022-09-30,32.7
2022-09-30,33.7
2022-09-30,32.7


In [41]:
# # Put your inputs into a single list
# df['single_input_vector'] = df[['temp_max']].apply(tuple, axis=1).apply(list)
# # Double-encapsulate list so that you can sum it in the next step and keep time steps as separate elements
# df['single_input_vector'] = df.single_input_vector.apply(lambda x: [list(x)])
# # Use .cumsum() to include previous row vectors in the current row list of vectors
# df['cumulative_input_vectors'] = df.single_input_vector.cumsum()

In [42]:
train, test = train_test_split(df, test_size=0.3)

In [43]:

train=train.sort_index()

test=test.sort_index()
train

Unnamed: 0_level_0,temp_max
odate,Unnamed: 1_level_1
2018-01-01,32.5
2018-01-01,32.1
2018-01-01,33.5
2018-01-01,29.5
2018-01-01,30.1
...,...
2022-09-30,32.5
2022-09-30,33.8
2022-09-30,30.8
2022-09-30,33.7


In [44]:
scalar=MinMaxScaler()
scalar.fit(train)
train_scaled=scalar.transform(train)
test_scaled=scalar.transform(test)


In [45]:
n_input=32
n_features=1
generator=TimeseriesGenerator(train_scaled,train_scaled,length=n_input,batch_size=25)

In [46]:
def get_model(params, input_shape):
	model = Sequential()
	model.add(LSTM(units=params["lstm_units"], return_sequences=True, input_shape=(n_input,n_features)))
	model.add(Dropout(rate=params["dropout"]))

	model.add(LSTM(units=params["lstm_units"], return_sequences=True))
	model.add(Dropout(rate=params["dropout"]))

	model.add(LSTM(units=params["lstm_units"], return_sequences=True))
	model.add(Dropout(rate=params["dropout"]))

	model.add(LSTM(units=params["lstm_units"], return_sequences=False))
	model.add(Dropout(rate=params["dropout"]))

	model.add(Dense(1))

	model.compile(loss=params["loss"],
              	optimizer=params["optimizer"],
              	metrics=[MeanAbsoluteError()])

	return model

In [48]:
params = {
	"loss": "mean_absolute_error",
	"optimizer": "adam",
	"dropout": 0.2,
	"lstm_units": 90,
	"epochs": 30,
	"batch_size": 128,
	"es_patience" : 10
}

model = get_model(params=params,input_shape=(n_input,n_features))

In [49]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_4 (LSTM)                (None, 32, 90)            33120     
_________________________________________________________________
dropout_4 (Dropout)          (None, 32, 90)            0         
_________________________________________________________________
lstm_5 (LSTM)                (None, 32, 90)            65160     
_________________________________________________________________
dropout_5 (Dropout)          (None, 32, 90)            0         
_________________________________________________________________
lstm_6 (LSTM)                (None, 32, 90)            65160     
_________________________________________________________________
dropout_6 (Dropout)          (None, 32, 90)            0         
_________________________________________________________________
lstm_7 (LSTM)                (None, 90)               

In [53]:
es_callback = tf.keras.callbacks.EarlyStopping(monitor='mean_absolute_error',
                                           	mode='min',
patience=params["es_patience"])

In [54]:
model.fit(
	generator,
	epochs=params["epochs"],
	batch_size=params["batch_size"],
	verbose=1,
	callbacks=[es_callback]
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x2925324c130>

In [None]:
loss_per_epoch=model.history.