## Denpasar Weather Dataset Link
You can download the dataset manually and then upload the dataset into the colab notebook

In [2]:
#https://tianchi.aliyun.com/dataset/dataDetail?spm=5176.12281978.0.0.2746bb320Ji0VP&dataId=93492

## Importing Libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split

## Data Wrangling
I saw that the dataset is cleaned so i just remove the feature that not needed

In [7]:
df = pd.read_csv('/content/openweatherdata-denpasar-1990-2020.csv')

In [8]:
df.drop(['dt','dt_iso','timezone','city_name','lat','lon','sea_level','grnd_level','rain_1h','rain_3h','rain_6h','rain_12h','rain_24h','rain_today','snow_1h','snow_3h','snow_6h','snow_12h','snow_24h','snow_today','weather_main','weather_id','weather_icon'], axis=1, inplace=True)

In [9]:
df.head()

Unnamed: 0,temp,temp_min,temp_max,pressure,humidity,wind_speed,wind_deg,clouds_all,weather_description
0,25.82,25.82,25.82,1010.0,86,1.36,225,98,light rain
1,26.2,26.2,26.2,1011.0,84,2.09,247,91,light rain
2,26.45,26.45,26.45,1011.0,84,2.44,262,94,light rain
3,26.8,26.8,26.8,1011.0,82,2.29,271,94,light rain
4,27.04,27.04,27.04,1010.0,82,1.71,274,76,broken clouds


### One Hot Encoding
Because of the weather_description is needed for labels as categorical variable so we can do one hot encoding

In [10]:
df2 = pd.get_dummies(df['weather_description'])

In [11]:
df2.head()

Unnamed: 0,broken clouds,drizzle,dust,few clouds,fog,haze,heavy intensity rain,heavy intesity shower rain,heavy thunderstorm,light intensity drizzle,light intensity shower rain,light rain,light thunderstorm,mist,moderate rain,overcast clouds,proximity moderate rain,proximity shower rain,proximity squalls,proximity thunderstorm,proximity tornado,scattered clouds,shower rain,sky is clear,smoke,thunderstorm,thunderstorm with heavy rain,thunderstorm with light rain,thunderstorm with rain,very heavy rain,volcanic ash
0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


#### Drop the weather_description 
By do that, we only see numeric values

In [12]:
df.drop('weather_description', axis=1, inplace=True)

In [13]:
df.head()

Unnamed: 0,temp,temp_min,temp_max,pressure,humidity,wind_speed,wind_deg,clouds_all
0,25.82,25.82,25.82,1010.0,86,1.36,225,98
1,26.2,26.2,26.2,1011.0,84,2.09,247,91
2,26.45,26.45,26.45,1011.0,84,2.44,262,94
3,26.8,26.8,26.8,1011.0,82,2.29,271,94
4,27.04,27.04,27.04,1010.0,82,1.71,274,76


#### Making New Dataframe
concatenate the dataframe and dummy variable 

In [14]:
df_new = pd.concat([df2,df],axis=1)

In [15]:
df_new.head()

Unnamed: 0,broken clouds,drizzle,dust,few clouds,fog,haze,heavy intensity rain,heavy intesity shower rain,heavy thunderstorm,light intensity drizzle,light intensity shower rain,light rain,light thunderstorm,mist,moderate rain,overcast clouds,proximity moderate rain,proximity shower rain,proximity squalls,proximity thunderstorm,proximity tornado,scattered clouds,shower rain,sky is clear,smoke,thunderstorm,thunderstorm with heavy rain,thunderstorm with light rain,thunderstorm with rain,very heavy rain,volcanic ash,temp,temp_min,temp_max,pressure,humidity,wind_speed,wind_deg,clouds_all
0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25.82,25.82,25.82,1010.0,86,1.36,225,98
1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26.2,26.2,26.2,1011.0,84,2.09,247,91
2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26.45,26.45,26.45,1011.0,84,2.44,262,94
3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26.8,26.8,26.8,1011.0,82,2.29,271,94
4,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27.04,27.04,27.04,1010.0,82,1.71,274,76


In [18]:
X = df_new[['temp','temp_min','temp_max','pressure','humidity','wind_speed','wind_deg','clouds_all']]

y = df_new[['broken clouds', 'drizzle', 'dust', 'few clouds', 'fog', 'haze',
       'heavy intensity rain', 'heavy intesity shower rain',
       'heavy thunderstorm', 'light intensity drizzle',
       'light intensity shower rain', 'light rain', 'light thunderstorm',
       'mist', 'moderate rain', 'overcast clouds', 'proximity moderate rain',
       'proximity shower rain', 'proximity squalls', 'proximity thunderstorm',
       'proximity tornado', 'scattered clouds', 'shower rain', 'sky is clear',
       'smoke', 'thunderstorm', 'thunderstorm with heavy rain',
       'thunderstorm with light rain', 'thunderstorm with rain',
       'very heavy rain', 'volcanic ash',]]

## Train the predicting model

### Split the data
I will split the data 80:20 because the dataset is realy large so 20 percent of testing data is more than enough

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [20]:
X_train.shape, y_train.shape

((211939, 8), (211939, 31))

### Model Building using Basic ANN

In [21]:
model = Sequential([
                    Dense(1024, input_shape=(None,8), activation='relu'),
                    Dense(31, activation='softmax')
])

In [23]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics='acc')

In [24]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f1e7699c8d0>

In [29]:
model.predict([[26.46, 26.86, 27.35, 1012.70, 81, 4.31, 122.1, 28]])

array([[5.7915861e-15, 9.2898454e-14, 3.9679326e-05, 1.7018126e-01,
        1.0442685e-05, 1.2672972e-03, 3.4865382e-04, 3.1311779e-36,
        3.0956623e-31, 1.3023499e-09, 1.5807627e-05, 4.6428427e-02,
        6.5168128e-11, 2.2108281e-05, 1.3091257e-03, 2.8688263e-30,
        1.4077387e-16, 2.5825397e-05, 1.2492925e-06, 1.4803627e-03,
        1.4512537e-05, 7.7544606e-01, 3.3005421e-05, 2.6147674e-08,
        7.1097733e-05, 2.7911435e-03, 1.8299964e-06, 5.5751469e-05,
        4.4498933e-04, 8.0615437e-06, 3.2909104e-06]], dtype=float32)

In [32]:
print('the accuracy is: ', round(7.7544606e-01*100,1), '%')

the accuracy is:  77.5 %


In [None]:
'broken clouds', 'drizzle', 'dust', 'few clouds', 
'fog', 'haze', 'heavy intensity rain', 'heavy intesity shower rain',
'heavy thunderstorm', 'light intensity drizzle', 'light intensity shower rain', 'light rain', 
'light thunderstorm', 'mist', 'moderate rain', 'overcast clouds', 
'proximity moderate rain', 'proximity shower rain', 'proximity squalls', 'proximity thunderstorm',
'proximity tornado', 'scattered clouds', 'shower rain', 'sky is clear',
'smoke', 'thunderstorm', 'thunderstorm with heavy rain', 'thunderstorm with light rain', 
'thunderstorm with rain', 'very heavy rain', 'volcanic ash'

The result is September 2021 will be scattered clouds in Depasar