In [26]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras import models, layers, utils, optimizers, callbacks
from sklearn.metrics import mean_squared_error, mean_absolute_error
import tensorflow as tf

In [2]:
from numpy.random import seed
seed(1)

In [5]:
dataset=pd.read_csv("Data/testset.csv")
dataset

Unnamed: 0,datetime_utc,_conds,_dewptm,_fog,_hail,_heatindexm,_hum,_precipm,_pressurem,_rain,_snow,_tempm,_thunder,_tornado,_vism,_wdird,_wdire,_wgustm,_windchillm,_wspdm
0,19961101-11:00,Smoke,9.0,0,0,,27.0,,1010.0,0,0,30.0,0,0,5.00,280.0,West,,,7.4
1,19961101-12:00,Smoke,10.0,0,0,,32.0,,-9999.0,0,0,28.0,0,0,,0.0,North,,,
2,19961101-13:00,Smoke,11.0,0,0,,44.0,,-9999.0,0,0,24.0,0,0,,0.0,North,,,
3,19961101-14:00,Smoke,10.0,0,0,,41.0,,1010.0,0,0,24.0,0,0,2.00,0.0,North,,,
4,19961101-16:00,Smoke,11.0,0,0,,47.0,,1011.0,0,0,23.0,0,0,1.20,0.0,North,,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99565,20161231-12:00,Partly Cloudy,15.0,0,0,,77.0,,1015.0,0,0,18.0,0,0,0.50,110.0,ESE,,,7.4
99566,20161231-13:00,Partly Cloudy,16.0,0,0,,100.0,,1014.0,0,0,16.0,0,0,0.80,90.0,East,,,3.7
99567,20161231-15:00,,13.0,0,0,,,,1016.0,0,0,,0,0,0.05,10.0,North,,,59.3
99568,20161231-18:00,Patches of Fog,12.0,1,0,,100.0,,1016.0,0,0,12.0,0,0,0.50,,,,,0.0


In [6]:
dataset.columns=dataset.columns.str.replace("_","")
dataset.columns=dataset.columns.str.replace(" ","")
dataset.columns

Index(['datetimeutc', 'conds', 'dewptm', 'fog', 'hail', 'heatindexm', 'hum',
       'precipm', 'pressurem', 'rain', 'snow', 'tempm', 'thunder', 'tornado',
       'vism', 'wdird', 'wdire', 'wgustm', 'windchillm', 'wspdm'],
      dtype='object')

In [7]:
dataset.drop(columns=["conds","dewptm","fog","hail","heatindexm","precipm","rain","snow","thunder","tornado","vism","wdird","wdire","wgustm","windchillm"],inplace=True,axis=1)
dataset

Unnamed: 0,datetimeutc,hum,pressurem,tempm,wspdm
0,19961101-11:00,27.0,1010.0,30.0,7.4
1,19961101-12:00,32.0,-9999.0,28.0,
2,19961101-13:00,44.0,-9999.0,24.0,
3,19961101-14:00,41.0,1010.0,24.0,
4,19961101-16:00,47.0,1011.0,23.0,0.0
...,...,...,...,...,...
99565,20161231-12:00,77.0,1015.0,18.0,7.4
99566,20161231-13:00,100.0,1014.0,16.0,3.7
99567,20161231-15:00,,1016.0,,59.3
99568,20161231-18:00,100.0,1016.0,12.0,0.0


In [8]:
dataset['pressurem'].replace('-9999.0',np.nan)
dataset.replace(0,np.nan)
dataset.isnull().sum()

datetimeutc       0
hum             756
pressurem       232
tempm           672
wspdm          2358
dtype: int64

In [9]:
dataset

Unnamed: 0,datetimeutc,hum,pressurem,tempm,wspdm
0,19961101-11:00,27.0,1010.0,30.0,7.4
1,19961101-12:00,32.0,-9999.0,28.0,
2,19961101-13:00,44.0,-9999.0,24.0,
3,19961101-14:00,41.0,1010.0,24.0,
4,19961101-16:00,47.0,1011.0,23.0,0.0
...,...,...,...,...,...
99565,20161231-12:00,77.0,1015.0,18.0,7.4
99566,20161231-13:00,100.0,1014.0,16.0,3.7
99567,20161231-15:00,,1016.0,,59.3
99568,20161231-18:00,100.0,1016.0,12.0,0.0


In [10]:
hum_median=dataset["hum"].median()
dataset["hum"].fillna(hum_median,inplace=True)
pressurem_median=dataset["pressurem"].median()
dataset["pressurem"].fillna(pressurem_median,inplace=True)
wspdm_median=dataset["wspdm"].median()
dataset["wspdm"].fillna(wspdm_median,inplace=True)
tempm_median=dataset["tempm"].median()
dataset["tempm"].fillna(tempm_median,inplace=True)

In [11]:
dataset.isnull().sum()

datetimeutc    0
hum            0
pressurem      0
tempm          0
wspdm          0
dtype: int64

In [12]:
def extract_year(value):
    return (value[0:4])

In [13]:
def extract_month(value):
    return (value[4:6])

In [14]:
def extract_date(value):
    return (value[6:8])

In [15]:
dataset["year"]=dataset["datetimeutc"].apply(lambda x : extract_year(x))
dataset["month"]=dataset["datetimeutc"].apply(lambda x : extract_month(x))
dataset["date"]=dataset["datetimeutc"].apply(lambda x : extract_month(x))

In [16]:
dataset.groupby("year").tempm.mean()

year
1996    16.658059
1997    24.477199
1998    24.872031
1999    27.333333
2000    23.884252
2001    25.599667
2002    26.487005
2003    26.182239
2004    26.010054
2005    24.720548
2006    25.455213
2007    24.813264
2008    24.527476
2009    25.358821
2010    25.559610
2011    24.968590
2012    25.028712
2013    24.690692
2014    25.184326
2015    25.148161
2016    27.314591
Name: tempm, dtype: float64

In [17]:
dataset

Unnamed: 0,datetimeutc,hum,pressurem,tempm,wspdm,year,month,date
0,19961101-11:00,27.0,1010.0,30.0,7.4,1996,11,11
1,19961101-12:00,32.0,-9999.0,28.0,7.4,1996,11,11
2,19961101-13:00,44.0,-9999.0,24.0,7.4,1996,11,11
3,19961101-14:00,41.0,1010.0,24.0,7.4,1996,11,11
4,19961101-16:00,47.0,1011.0,23.0,0.0,1996,11,11
...,...,...,...,...,...,...,...,...
99565,20161231-12:00,77.0,1015.0,18.0,7.4,2016,12,12
99566,20161231-13:00,100.0,1014.0,16.0,3.7,2016,12,12
99567,20161231-15:00,58.0,1016.0,27.0,59.3,2016,12,12
99568,20161231-18:00,100.0,1016.0,12.0,0.0,2016,12,12


In [21]:
dataset_regression=dataset[['hum','tempm','pressurem','wspdm','year','month','date']]

In [22]:
dataset_regression

Unnamed: 0,hum,tempm,pressurem,wspdm,year,month,date
0,27.0,30.0,1010.0,7.4,1996,11,11
1,32.0,28.0,-9999.0,7.4,1996,11,11
2,44.0,24.0,-9999.0,7.4,1996,11,11
3,41.0,24.0,1010.0,7.4,1996,11,11
4,47.0,23.0,1011.0,0.0,1996,11,11
...,...,...,...,...,...,...,...
99565,77.0,18.0,1015.0,7.4,2016,12,12
99566,100.0,16.0,1014.0,3.7,2016,12,12
99567,58.0,27.0,1016.0,59.3,2016,12,12
99568,100.0,12.0,1016.0,0.0,2016,12,12


In [23]:
X_part=dataset_regression.drop(["tempm"],axis=1)
Y_part=dataset_regression["tempm"]
X_part=np.array(X_part)
Y_part=np.array(Y_part).reshape(-1,1)

In [24]:
print("X shape:",X_part.shape)
print("Y shape:",Y_part.shape)

X shape: (99570, 6)
Y shape: (99570, 1)


In [27]:
scaler2=MinMaxScaler(feature_range=[-1,1])
X_part_scaled=scaler2.fit_transform(X_part)
Y_part_scaled=scaler2.fit_transform(Y_part)

In [28]:
step=30
input=[]
output=[]
for i in range(len(X_part_scaled)-(step)):
    input.append(X_part_scaled[i:i+step])
    output.append(Y_part_scaled[i+step])
 
input=np.array(input)
output=np.array(output)

print(input.shape)
print(output.shape)

(99540, 30, 6)
(99540, 1)


In [None]:
trainR_X=input[:7300,::]
testR_X=input[7300:,::]
print("train_X Shape:",trainR_X.shape, ",test_X Shape:", testR_X.shape)

trainR_Y=output[:7300]
testR_Y=output[7300:]
print("test_Y Shape:",trainR_Y.shape, ",test_Y Shape:", testR_Y.shape)