# Alireza Bolhassani
----
## Download data


In [5]:
!wget "https://drive.google.com/uc?export=download&id=173tvWt-qPyAqZJc1KoBiUHgoSg6nLUtT" -O munster_hourly.csv

from IPython.display import clear_output
clear_output(wait=False)

----
## Data information
- DATUM: The datetime column   
- STATIONS_ID DWD weather station ID (1766 = Münster/Osnabrück)    
- TT_TER: air temperature    
- RF_TER: relative humidty
- VGSL: real evapotranspiration over gras and sandy loam (mm)
- TS05: mean daily soil temperature in 5 cm depth for uncovered typical soil (°C)
- BF10: soil moisture under grass and sandy loam between 0 and 10 cm depth in % plant useable water (%nFK)


In [69]:
import pandas as pd
df = pd.read_csv("munster_hourly.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35672 entries, 0 to 35671
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   DATUM        35672 non-null  object 
 1   STATIONS_ID  35672 non-null  int64  
 2   QN_4         35672 non-null  int64  
 3   TT_TER       35672 non-null  float64
 4   RF_TER       35672 non-null  float64
 5   VGSL         35672 non-null  float64
 6   TS05         35672 non-null  float64
 7   BF10         35672 non-null  int64  
dtypes: float64(4), int64(3), object(1)
memory usage: 2.2+ MB


In [70]:
print("The length of data :", len(df))
df.head(3)

The length of data : 35672


Unnamed: 0,DATUM,STATIONS_ID,QN_4,TT_TER,RF_TER,VGSL,TS05,BF10
0,1991-01-01 07:00:00,1766,10,3.0,91.0,0.3,2.9,102
1,1991-01-01 14:00:00,1766,10,4.8,85.0,0.3,2.9,102
2,1991-01-01 21:00:00,1766,10,3.9,82.0,0.3,2.9,102


----

In [80]:
from sklearn.model_selection import train_test_split
import random

X = df.loc[:, ["TT_TER", "RF_TER", "TS05"]]
y = df["TT_TER"] * 2 + df["RF_TER"]

X_trainvald, X_test, y_trainvald, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
X_train, X_vald, y_train, y_vald = train_test_split(X_trainvald, y_trainvald, test_size=0.25, random_state=42)

X_train.shape

(21402, 3)

In [81]:
from tensorflow.keras import layers, models

model = models.Sequential()
model.add(layers.Dense(5, input_dim=X_train.shape[1], activation='relu'))
model.add(layers.Dense(5, activation='relu'))
model.add(layers.Dense(1, activation='linear'))  # Output layer with 3 neurons and softmax activation for multiclass classification


model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])


In [82]:
history = model.fit(X_train, y_train, batch_size=32, epochs=10, validation_data=(X_vald, y_vald))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [83]:
# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}, Test Accuracy: {accuracy}')

Test Loss: 194.8867950439453, Test Accuracy: 0.0


In [84]:
model.predict([[1,2,10]])



array([[5.567702]], dtype=float32)