# Importing libraries

In [44]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor
from sklearn.metrics import classification_report, mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import joblib
import requests_cache
from sklearn.preprocessing import RobustScaler
from metpy.calc import wind_components
from metpy.units import units
from openmeteo_requests import Client
from retry_requests import retry
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, LSTM, Dense
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
import joblib

# Loading Data

In [38]:
aqaba = pd.read_csv('Datasets-with-cloud/aqaba_Ready.csv')
ghor = pd.read_csv('Datasets-with-cloud/ghor_Ready.csv')
irbid = pd.read_csv('Datasets-with-cloud/irbid_Ready.csv')
irwaished = pd.read_csv('Datasets-with-cloud/irwaished_Ready.csv')
maan = pd.read_csv('Datasets-with-cloud/maan_Ready.csv')
mafraq = pd.read_csv('Datasets-with-cloud/mafraq_Ready.csv')
amman = pd.read_csv('Datasets-with-cloud/amman_Ready.csv')  
safawi = pd.read_csv('Datasets-with-cloud/safawi_Ready.csv')

In [32]:
names = ['aqaba', 'ghor', 'irbid', 'irwaished', 'maan', 'mafraq', 'amman', 'safawi']
datasets = [aqaba, ghor, irbid, irwaished, maan, mafraq, amman, safawi]
coordinates = [            # (latitude, longitude)
    (29.5500, 35.0000),    # Aqabah
    (31.0333, 35.4667),    # Ghor
    (32.5500, 35.8500),    # Irbid
    (32.5000, 38.2000),    # Irwaished
    (30.1667, 35.7833),    # Maan
    (32.3667, 36.2500),    # Mafraq
    (31.7167, 35.9833),    # Amman
    (32.1608, 37.1539),    # Safawi
]
Stations = {}
for i in range(len(names)):
    Stations[names[i]] = {}
    Stations[names[i]]['coordinates'] = coordinates[i]
    Stations[names[i]]['dataset'] = datasets[i]
    

# Training Cloud Cover Models

In [33]:
def df_to_X_y(df_X, df_y, window_size=14):
    X, y = [], []
    for i in range(len(df_X) - window_size):
        X.append(df_X.iloc[i:i+window_size].values)
        y.append(df_y.iloc[i + window_size].values)
    return np.array(X), np.array(y)

In [34]:
for i in range(len(names)):
    ############## Data Manupilation ##############

    df = datasets[i].copy()
    df.drop(['Liquid Precipitation'], axis=1, inplace=True)
    df['time'] = pd.to_datetime(df['Unnamed: 0'])
    df.drop(columns=['Unnamed: 0'], inplace=True)

    df['hour'] = df['time'].dt.hour
    df['month'] = df['time'].dt.month

    #Cyclical encoding
    df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
    df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)
    df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
    df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)

    input_features = ['Air Dew Point', 'Air Temperature (OC)', 'Humidity %',
                      'Atmospheric Pressure', 'Wind_U', 'Wind_V']
    target_feature = 'Cloud Cover %'

    train_df = df[df['time'] < '2023-11-01'].copy()
    val_df = df[(df['time'] >= '2023-11-01')].copy()

    #Scalers
    input_scaler = RobustScaler()
    input_scaler.fit(train_df[input_features])
    train_df[input_features] = input_scaler.transform(train_df[input_features])
    val_df[input_features] = input_scaler.transform(val_df[input_features])

    target_scaler = RobustScaler()
    target_scaler.fit(train_df[[target_feature]])
    train_df[target_feature] = target_scaler.transform(train_df[[target_feature]])
    val_df[target_feature] = target_scaler.transform(val_df[[target_feature]])

    #Save scalers
    input_scaler_path = f"Models/Cloud_Cover_Models/{names[i]}_input_scaler.save"
    target_scaler_path = f"Models/Cloud_Cover_Models/{names[i]}_target_scaler.save"
    joblib.dump(input_scaler, input_scaler_path)
    joblib.dump(target_scaler, target_scaler_path)

    #Apply window 
    X_train, y_train = df_to_X_y(train_df[input_features], train_df[[target_feature]])
    X_val, y_val = df_to_X_y(val_df[input_features], val_df[[target_feature]])

    ############## Model Definition and Training ##############
    model = Sequential([
        InputLayer(input_shape=(14, 6)),
        LSTM(64, return_sequences=True),
        LSTM(128),
        Dense(64, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer=Adam(0.0001), loss=MeanSquaredError(), metrics=['mse'])

    model_path = f"Models/Cloud_Cover_Models/{names[i]}_cloud_model.keras"
    cp = ModelCheckpoint(model_path, save_best_only=True)

    model.fit(X_train, y_train,
              validation_data=(X_val, y_val),
              epochs=10,
              callbacks=[cp])



Epoch 1/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 30ms/step - loss: 3.6959 - mse: 3.6959 - val_loss: 2.9306 - val_mse: 2.9306
Epoch 2/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 30ms/step - loss: 3.4733 - mse: 3.4733 - val_loss: 2.8704 - val_mse: 2.8704
Epoch 3/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m207s[0m 31ms/step - loss: 3.4144 - mse: 3.4144 - val_loss: 2.6947 - val_mse: 2.6947
Epoch 4/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m178s[0m 27ms/step - loss: 3.3743 - mse: 3.3743 - val_loss: 2.7070 - val_mse: 2.7070
Epoch 5/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m215s[0m 29ms/step - loss: 3.2957 - mse: 3.2957 - val_loss: 2.5995 - val_mse: 2.5995
Epoch 6/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m204s[0m 30ms/step - loss: 3.2880 - mse: 3.2880 - val_loss: 2.5840 - val_mse: 2.5840
Epoch 7/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m



Epoch 1/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m188s[0m 28ms/step - loss: 1.5064 - mse: 1.5064 - val_loss: 4.5284e-04 - val_mse: 4.5284e-04
Epoch 2/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m214s[0m 30ms/step - loss: 1.4065 - mse: 1.4065 - val_loss: 0.0027 - val_mse: 0.0027
Epoch 3/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m234s[0m 35ms/step - loss: 1.3795 - mse: 1.3795 - val_loss: 4.8908e-04 - val_mse: 4.8908e-04
Epoch 4/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m244s[0m 32ms/step - loss: 1.3496 - mse: 1.3496 - val_loss: 0.0011 - val_mse: 0.0011
Epoch 5/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m253s[0m 31ms/step - loss: 1.3314 - mse: 1.3314 - val_loss: 0.0022 - val_mse: 0.0022
Epoch 6/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 30ms/step - loss: 1.3195 - mse: 1.3195 - val_loss: 7.4879e-04 - val_mse: 7.4879e-04
Epoch 7/10
[1m6529/6529[0m [32m



Epoch 1/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m226s[0m 34ms/step - loss: 0.3831 - mse: 0.3831 - val_loss: 0.1051 - val_mse: 0.1051
Epoch 2/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m263s[0m 34ms/step - loss: 0.3273 - mse: 0.3273 - val_loss: 0.0248 - val_mse: 0.0248
Epoch 3/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m263s[0m 34ms/step - loss: 0.3188 - mse: 0.3188 - val_loss: 0.0159 - val_mse: 0.0159
Epoch 4/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m267s[0m 35ms/step - loss: 0.3129 - mse: 0.3129 - val_loss: 0.0119 - val_mse: 0.0119
Epoch 5/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m256s[0m 34ms/step - loss: 0.3068 - mse: 0.3068 - val_loss: 0.0219 - val_mse: 0.0219
Epoch 6/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m263s[0m 34ms/step - loss: 0.3030 - mse: 0.3030 - val_loss: 0.0182 - val_mse: 0.0182
Epoch 7/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

ValueError: Found array with 0 sample(s) (shape=(0, 6)) while a minimum of 1 is required by RobustScaler.

In [35]:
irwaished.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 191383 entries, 0 to 191382
Data columns (total 9 columns):
 #   Column                Non-Null Count   Dtype  
---  ------                --------------   -----  
 0   Unnamed: 0            191383 non-null  object 
 1   Air Dew Point         191383 non-null  float64
 2   Air Temperature (OC)  191383 non-null  float64
 3   Humidity %            191383 non-null  float64
 4   Atmospheric Pressure  191383 non-null  float64
 5   Liquid Precipitation  191383 non-null  float64
 6   Cloud Cover %         191383 non-null  float64
 7   Wind_U                191383 non-null  float64
 8   Wind_V                191383 non-null  float64
dtypes: float64(8), object(1)
memory usage: 13.1+ MB


In [40]:
d2 = [maan, mafraq, amman, safawi]
n2 = ['maan', 'mafraq', 'amman', 'safawi']

for i in range(len(n2)):
    ############## Data Manupilation ##############

    df = d2[i].copy()
    df.drop(['Liquid Precipitation'], axis=1, inplace=True)
    df['time'] = pd.to_datetime(df['Unnamed: 0'])
    df.drop(columns=['Unnamed: 0'], inplace=True)

    df['hour'] = df['time'].dt.hour
    df['month'] = df['time'].dt.month

    #Cyclical encoding
    df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
    df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)
    df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
    df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)

    input_features = ['Air Dew Point', 'Air Temperature (OC)', 'Humidity %',
                      'Atmospheric Pressure', 'Wind_U', 'Wind_V']
    target_feature = 'Cloud Cover %'

    train_df = df[df['time'] < '2023-11-01'].copy()
    val_df = df[(df['time'] >= '2023-11-01')].copy()

    #Scalers
    input_scaler = RobustScaler()
    input_scaler.fit(train_df[input_features])
    train_df[input_features] = input_scaler.transform(train_df[input_features])
    val_df[input_features] = input_scaler.transform(val_df[input_features])

    target_scaler = RobustScaler()
    target_scaler.fit(train_df[[target_feature]])
    train_df[target_feature] = target_scaler.transform(train_df[[target_feature]])
    val_df[target_feature] = target_scaler.transform(val_df[[target_feature]])

    #Save scalers
    input_scaler_path = f"Models/Cloud_Cover_Models/{n2[i]}_input_scaler.save"
    target_scaler_path = f"Models/Cloud_Cover_Models/{n2[i]}_target_scaler.save"
    joblib.dump(input_scaler, input_scaler_path)
    joblib.dump(target_scaler, target_scaler_path)

    #Apply window 
    X_train, y_train = df_to_X_y(train_df[input_features], train_df[[target_feature]])
    X_val, y_val = df_to_X_y(val_df[input_features], val_df[[target_feature]])

    ############## Model Definition and Training ##############
    model = Sequential([
        InputLayer(input_shape=(14, 6)),
        LSTM(64, return_sequences=True),
        LSTM(128),
        Dense(64, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer=Adam(0.0001), loss=MeanSquaredError(), metrics=['mse'])

    model_path = f"Models/Cloud_Cover_Models/{n2[i]}_cloud_model.keras"
    cp = ModelCheckpoint(model_path, save_best_only=True)

    model.fit(X_train, y_train,
              validation_data=(X_val, y_val),
              epochs=10,
              callbacks=[cp])



Epoch 1/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m226s[0m 34ms/step - loss: 0.8495 - mse: 0.8495 - val_loss: 2.0468 - val_mse: 2.0468
Epoch 2/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m220s[0m 34ms/step - loss: 0.7883 - mse: 0.7883 - val_loss: 2.1111 - val_mse: 2.1111
Epoch 3/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m268s[0m 35ms/step - loss: 0.7727 - mse: 0.7727 - val_loss: 2.1647 - val_mse: 2.1647
Epoch 4/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 34ms/step - loss: 0.7598 - mse: 0.7598 - val_loss: 2.1283 - val_mse: 2.1283
Epoch 5/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m221s[0m 34ms/step - loss: 0.7536 - mse: 0.7536 - val_loss: 2.2123 - val_mse: 2.2123
Epoch 6/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m265s[0m 34ms/step - loss: 0.7415 - mse: 0.7415 - val_loss: 2.2688 - val_mse: 2.2688
Epoch 7/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m



Epoch 1/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m234s[0m 35ms/step - loss: 0.3294 - mse: 0.3294 - val_loss: 0.4132 - val_mse: 0.4132
Epoch 2/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m230s[0m 35ms/step - loss: 0.3063 - mse: 0.3063 - val_loss: 0.3953 - val_mse: 0.3953
Epoch 3/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m223s[0m 34ms/step - loss: 0.2967 - mse: 0.2967 - val_loss: 0.4040 - val_mse: 0.4040
Epoch 4/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 35ms/step - loss: 0.2892 - mse: 0.2892 - val_loss: 0.3972 - val_mse: 0.3972
Epoch 5/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m225s[0m 34ms/step - loss: 0.2862 - mse: 0.2862 - val_loss: 0.3846 - val_mse: 0.3846
Epoch 6/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m266s[0m 35ms/step - loss: 0.2793 - mse: 0.2793 - val_loss: 0.3786 - val_mse: 0.3786
Epoch 7/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m



Epoch 1/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m239s[0m 36ms/step - loss: 0.4505 - mse: 0.4505 - val_loss: 0.5040 - val_mse: 0.5040
Epoch 2/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m234s[0m 36ms/step - loss: 0.4001 - mse: 0.4001 - val_loss: 0.5020 - val_mse: 0.5020
Epoch 3/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m260s[0m 36ms/step - loss: 0.3954 - mse: 0.3954 - val_loss: 0.4832 - val_mse: 0.4832
Epoch 4/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m230s[0m 35ms/step - loss: 0.3831 - mse: 0.3831 - val_loss: 0.4825 - val_mse: 0.4825
Epoch 5/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m266s[0m 36ms/step - loss: 0.3770 - mse: 0.3770 - val_loss: 0.4581 - val_mse: 0.4581
Epoch 6/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 36ms/step - loss: 0.3697 - mse: 0.3697 - val_loss: 0.4696 - val_mse: 0.4696
Epoch 7/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m



Epoch 1/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m226s[0m 34ms/step - loss: 0.5527 - mse: 0.5527 - val_loss: 1.0697 - val_mse: 1.0697
Epoch 2/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m229s[0m 35ms/step - loss: 0.5156 - mse: 0.5156 - val_loss: 1.0393 - val_mse: 1.0393
Epoch 3/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m265s[0m 36ms/step - loss: 0.5054 - mse: 0.5054 - val_loss: 0.9882 - val_mse: 0.9882
Epoch 4/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 35ms/step - loss: 0.5011 - mse: 0.5011 - val_loss: 1.0049 - val_mse: 1.0049
Epoch 5/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m260s[0m 35ms/step - loss: 0.4929 - mse: 0.4929 - val_loss: 0.9965 - val_mse: 0.9965
Epoch 6/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 35ms/step - loss: 0.4883 - mse: 0.4883 - val_loss: 0.9630 - val_mse: 0.9630
Epoch 7/10
[1m6529/6529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [43]:


df = irwaished.copy()
df.drop(['Liquid Precipitation'], axis=1, inplace=True)
df['time'] = pd.to_datetime(df['Unnamed: 0'])
df.drop(columns=['Unnamed: 0'], inplace=True)

df['hour'] = df['time'].dt.hour
df['month'] = df['time'].dt.month

    #Cyclical encoding
df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)
df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)

input_features = ['Air Dew Point', 'Air Temperature (OC)', 'Humidity %',
                      'Atmospheric Pressure', 'Wind_U', 'Wind_V']
target_feature = 'Cloud Cover %'

train_df = df.copy()

    #Scalers
input_scaler = RobustScaler()
input_scaler.fit(train_df[input_features])
train_df[input_features] = input_scaler.transform(train_df[input_features])


target_scaler = RobustScaler()
target_scaler.fit(train_df[[target_feature]])
train_df[target_feature] = target_scaler.transform(train_df[[target_feature]])

    #Save scalers
input_scaler_path = f"Models/Cloud_Cover_Models/{'irwaished'}_input_scaler.save"
target_scaler_path = f"Models/Cloud_Cover_Models/{'irwaished'}_target_scaler.save"
joblib.dump(input_scaler, input_scaler_path)
joblib.dump(target_scaler, target_scaler_path)

    #Apply window 
X_train, y_train = df_to_X_y(train_df[input_features], train_df[[target_feature]])


    ############## Model Definition and Training ##############
model = Sequential([
        InputLayer(input_shape=(14, 6)),
        LSTM(64, return_sequences=True),
        LSTM(128),
        Dense(64, activation='relu'),
        Dense(1)
])
model.compile(optimizer=Adam(0.0001), loss=MeanSquaredError(), metrics=['mse'])

model_path = f"Models/Cloud_Cover_Models/irwaished_cloud_model.keras"
cp = ModelCheckpoint(model_path)

model.fit(X_train, y_train,
              epochs=10,
              callbacks=[cp])



Epoch 1/10
[1m5981/5981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m212s[0m 35ms/step - loss: 1.8314 - mse: 1.8314
Epoch 2/10
[1m5981/5981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 34ms/step - loss: 1.6951 - mse: 1.6951
Epoch 3/10
[1m5981/5981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m206s[0m 34ms/step - loss: 1.6762 - mse: 1.6762
Epoch 4/10
[1m5981/5981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m208s[0m 35ms/step - loss: 1.6414 - mse: 1.6414
Epoch 5/10
[1m5981/5981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 34ms/step - loss: 1.6048 - mse: 1.6048
Epoch 6/10
[1m5981/5981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m204s[0m 34ms/step - loss: 1.5790 - mse: 1.5790
Epoch 7/10
[1m5981/5981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m206s[0m 34ms/step - loss: 1.5490 - mse: 1.5490
Epoch 8/10
[1m5981/5981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m263s[0m 35ms/step - loss: 1.5199 - mse: 1.5199
Epoch 9/10
[1m5981/5981[0m [32m━━━━━━

<keras.src.callbacks.history.History at 0x22727e796f0>

# Precipitation Models

In [47]:
class RainPredictionDataset(Dataset):
    def __init__(self, df, seq_len=48, output_len=24):
        self.seq_len = seq_len
        self.output_len = output_len
        self.features = df[features].values.astype(np.float32)
        self.timestamps = df['time'].reset_index(drop=True)
        self.precip = df['Liquid Precipitation'].reset_index(drop=True).values
        self.X, self.y = [], []

        #sliding window
        for i in range(len(df) - seq_len - output_len):
            x_window = self.features[i:i+seq_len]
            y_hat = self.precip[i+seq_len:i+seq_len+output_len]

            start_time = self.timestamps[i]
            end_time = self.timestamps[i + seq_len + output_len - 1]
            expected_hours = seq_len + output_len - 1
            if (end_time - start_time).total_seconds() / 3600 != expected_hours:
                continue  

            y = float(np.sum(y_hat) > 0) 
            self.X.append(x_window)
            self.y.append(y)

        self.X = np.array(self.X, dtype=np.float32)
        self.y = np.array(self.y, dtype=np.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx], dtype=torch.float32), torch.tensor(self.y[idx], dtype=torch.float32)

In [48]:
class RainLSTM(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2):
        super(RainLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()  
        )

    def forward(self, x):
        _, (hn, _) = self.lstm(x) 
        return self.fc(hn[-1])  

In [50]:
features = [
    'Air Dew Point', 'Air Temperature (OC)', 'Humidity %',
    'Atmospheric Pressure', 'Cloud Cover %', 'Wind_U', 'Wind_V',
    'hour_sin', 'hour_cos', 'day_sin', 'day_cos'
]

In [52]:
for i in range(len(names)):
    ############## Data Manupilation ##############

    df = datasets[i].copy()
    df['time'] = pd.to_datetime(df['Unnamed: 0'])
    df.drop(columns=['Unnamed: 0'], inplace=True)

    df['hour'] = df['time'].dt.hour
    df['day_yr'] = df['time'].dt.dayofyear
    df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
    df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)
    df['day_sin'] = np.sin(2 * np.pi * df['day_yr'] / 365)
    df['day_cos'] = np.cos(2 * np.pi * df['day_yr'] / 365)

    scaler = StandardScaler()
    df[features] = scaler.fit_transform(df[features])
    
    scaler_path = f"Models/Precipitation_Models/{names[i]}_scaler.pkl"
    joblib.dump(scaler, scaler_path)

    df['date'] = df['time'].dt.date
    rain_per_day = df.groupby('date')['Liquid Precipitation'].sum()
    rain_tomorrow = (rain_per_day.shift(-1) > 0).astype(int)
    df['RainTomorrow'] = df['date'].map(rain_tomorrow)

    df = df.dropna(subset=['RainTomorrow'])

    full = RainPredictionDataset(df)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    
    train_len = int(len(full) * 0.95)
    train_ds = torch.utils.data.Subset(full, range(train_len))
    val_ds = torch.utils.data.Subset(full, range(train_len, len(full)))

    model = RainLSTM(input_size=len(features)).to(device)


    train_loader = DataLoader(train_ds, batch_size=256, shuffle=True, pin_memory=True)
    val_loader = DataLoader(val_ds, batch_size=256, pin_memory=True)


    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.BCELoss()


    for epoch in range(10):
        model.train()
        total_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch = X_batch.to(device, non_blocking=True)
            y_batch = y_batch.to(device, non_blocking=True).unsqueeze(1)
            optimizer.zero_grad()
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"Epoch {epoch+1} - Loss: {total_loss / len(train_loader):.4f}")

    model_path  = f"Models/Precipitation_Models/{names[i]}_precipitation_model.pth"
    torch.save(model.state_dict(), model_path)

Epoch 1 - Loss: 0.1032
Epoch 2 - Loss: 0.0746
Epoch 3 - Loss: 0.0600
Epoch 4 - Loss: 0.0451
Epoch 5 - Loss: 0.0332
Epoch 6 - Loss: 0.0247
Epoch 7 - Loss: 0.0195
Epoch 8 - Loss: 0.0153
Epoch 9 - Loss: 0.0142
Epoch 10 - Loss: 0.0134
Epoch 1 - Loss: 0.1885
Epoch 2 - Loss: 0.1542
Epoch 3 - Loss: 0.1400
Epoch 4 - Loss: 0.1180
Epoch 5 - Loss: 0.0976
Epoch 6 - Loss: 0.0787
Epoch 7 - Loss: 0.0679
Epoch 8 - Loss: 0.0584
Epoch 9 - Loss: 0.0517
Epoch 10 - Loss: 0.0440
Epoch 1 - Loss: 0.2450
Epoch 2 - Loss: 0.2128
Epoch 3 - Loss: 0.1929
Epoch 4 - Loss: 0.1710
Epoch 5 - Loss: 0.1477
Epoch 6 - Loss: 0.1247
Epoch 7 - Loss: 0.1047
Epoch 8 - Loss: 0.0862
Epoch 9 - Loss: 0.0719
Epoch 10 - Loss: 0.0588
Epoch 1 - Loss: 0.1825
Epoch 2 - Loss: 0.1503
Epoch 3 - Loss: 0.1339
Epoch 4 - Loss: 0.1124
Epoch 5 - Loss: 0.0893
Epoch 6 - Loss: 0.0703
Epoch 7 - Loss: 0.0583
Epoch 8 - Loss: 0.0444
Epoch 9 - Loss: 0.0342
Epoch 10 - Loss: 0.0300
Epoch 1 - Loss: 0.1489
Epoch 2 - Loss: 0.1271
Epoch 3 - Loss: 0.1149
Epoch 4

# Generating output

In [53]:
from math import radians, sin, cos, sqrt, atan2

In [94]:
def haversine(lat1, lon1, lat2, lon2):
    R = 6371.0
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    a = sin(dlat / 2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2)**2
    return R * 2 * atan2(sqrt(a), sqrt(1 - a))

def predict_rain_probability(location: tuple, names, df_42h: pd.DataFrame, models_dir="Models/Precipitation_Models"):
    assert len(df_42h) == 48, "DataFrame must contain exactly 42 rows (hours)"
    
    # Fixed input features — must match training time exactly
    input_features = [
    'Air Dew Point', 'Air Temperature (OC)', 'Humidity %',
    'Atmospheric Pressure', 'Cloud Cover %', 'Wind_U', 'Wind_V',
    'hour_sin', 'hour_cos', 'day_sin', 'day_cos'
    ]
    
     # Find closest station
    lat, lon = location
    distances = [haversine(lat, lon, stat_lat, stat_lon) for stat_lat, stat_lon in coordinates]
    closest_idx = int(np.argmin(distances))
    station = names[closest_idx]

    # Load model and scaler
    scaler = joblib.load(f"{models_dir}/{station}_scaler.pkl")
    model = RainLSTM(input_size=11)
    model.load_state_dict(torch.load(f"{models_dir}/{station}_precipitation_model.pth", map_location=torch.device('cpu')))
    model.eval()

    # Copy and create temporal features
    df = df_42h.copy()
    df['hour'] = df['time'].dt.hour
    df['day_yr'] = df['time'].dt.dayofyear
    df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
    df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)
    df['day_sin'] = np.sin(2 * np.pi * df['day_yr'] / 365)
    df['day_cos'] = np.cos(2 * np.pi * df['day_yr'] / 365)

    # Scale all 11 input features
    df[input_features] = scaler.transform(df[input_features])

    # Prepare tensor
    input_data = df[input_features].values.astype(np.float32)

    # Pad to 48 hours if needed
    if len(input_data) < 48:
        pad_len = 48 - len(input_data)
        padding = np.repeat(input_data[-1:], pad_len, axis=0)
        input_data = np.vstack([input_data, padding])

    input_tensor = torch.tensor(input_data).unsqueeze(0)  # shape (1, 48, 11)

    with torch.no_grad():
        prob = model(input_tensor).item()

    return {
        "closest_station": station,
        "precipitation_probability": round(prob, 4)
    }

In [60]:
test_df = amman.copy()

In [63]:
test_df['time'] = pd.to_datetime(test_df['Unnamed: 0'])
test_df.drop(['Unnamed: 0'], axis = 1, inplace = True)

In [112]:
test = test_df[
    (test_df['time'] < pd.to_datetime('2023-12-24 00:00:00')) &
    (test_df['time'] > pd.to_datetime('2023-12-21 23:00:00'))
]

In [108]:
test.drop(['Liquid Precipitation'], axis = 1, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test.drop(['Liquid Precipitation'], axis = 1, inplace = True)


In [113]:
location = (31.9, 36.0) 
features = ['Air Dew Point', 'Air Temperature (OC)', 'Humidity %',
       'Atmospheric Pressure', 'Cloud Cover %',
       'Wind_U', 'Wind_V']  
result = predict_rain_probability(location,names, test)
print(result)

{'closest_station': 'amman', 'precipitation_probability': 0.9113}
