In [283]:
import tensorflow as tf
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler, OneHotEncoder

In [None]:
PATH = r"C:\Users\abdul\OneDrive\Documents\GEOG398E Project\datasets\cleaned_V2.0_status_intensity_observation_data.csv"

df = pd.read_csv(PATH)

In [300]:
#make a new df for training usign the observation date to only get years 2011 - 2022
df_train = df[df['Observation_Date'].str.contains('2011|2012|2013|2014|2015|2016|2017|2018|2019|2020|2021|2022')]
#make new df for ground truth information taking just from 2023 from observation date which is year-month-day
df_gt = df[df['Observation_Date'].str.contains('2023|2024')]

# Define features (X) and target variable (y) for training
X_train = df_train[['AGDD', 'Daylength', 'Prcp', 'Tmax', 'Tmin', 'Year', 'Month', 'Day', 'Accum_Prcp']] 
y_train = df_train['Intensity_Value']

# Define features (X) and target variable (y) for testing
X_test = df_gt[['AGDD', 'Daylength', 'Prcp', 'Tmax', 'Tmin', 'Year', 'Month', 'Day', 'Accum_Prcp']] 
y_test = df_gt['Intensity_Value']

KeyError: 'Observation_Date'

In [286]:
# land cover
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')

encoded_land_cover_train = encoder.fit_transform(df_train[['land_cover_type']])
encoded_land_cover_test = encoder.transform(df_gt[['land_cover_type']])

encoded_feature_names = encoder.get_feature_names_out(['land_cover_type'])

encoded_land_cover_train_df = pd.DataFrame(encoded_land_cover_train, columns=encoded_feature_names, index=df_train.index)
encoded_land_cover_test_df = pd.DataFrame(encoded_land_cover_test, columns=encoded_feature_names, index=df_gt.index)

X_train = pd.concat([X_train, encoded_land_cover_train_df], axis=1)
X_test = pd.concat([X_test, encoded_land_cover_test_df], axis=1)


In [287]:
X_train['Month_sin'] = np.sin(2 * np.pi * X_train['Month'] / 12)
X_train['Month_cos'] = np.cos(2 * np.pi * X_train['Month'] / 12)

X_test['Month_sin'] = np.sin(2 * np.pi * X_test['Month'] / 12)
X_test['Month_cos'] = np.cos(2 * np.pi * X_test['Month'] / 12)

for df in [X_train, X_test]:
    df['Day_sin'] = np.sin(2 * np.pi * df['Day'] / 31)  # Max 31 days in a month
    df['Day_cos'] = np.cos(2 * np.pi * df['Day'] / 31)

In [288]:
# X_train = X_train.drop(['Month', 'Day'], axis=1)
# X_test = X_test.drop(['Month', 'Day'], axis=1)

In [289]:
scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)

In [290]:
# Xtrain, Xtest, ytrain, ytest = train_test_split(X_train, y_train, test_size=0.5, random_state=5)
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

(21094, 21) (5738, 21)
(21094,) (5738,)


In [291]:
number_of_class = 10
train_labels_onehot = tf.one_hot(y_train, number_of_class)
test_labels_onehot = tf.one_hot(y_test, number_of_class)
print('New shape: ', train_labels_onehot.shape)
print('New value: ', train_labels_onehot)

New shape:  (21094, 10)
New value:  tf.Tensor(
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]], shape=(21094, 10), dtype=float32)


In [None]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(X_train.values)  

model = tf.keras.Sequential([
  normalizer,
  tf.keras.layers.Dense(128, activation='relu'), # 64 neurons, ReLU activation function
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(64, activation='relu'),
  tf.keras.layers.Dense(10, activation='softmax') # output layer, 10 because 10 labels (0-9)
])

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

model.compile(
  optimizer=optimizer,
  loss=tf.keras.losses.CategoricalCrossentropy(),
  metrics=['accuracy']
)

In [298]:
model.fit(X_train, train_labels_onehot, epochs=10)

Epoch 1/10
[1m660/660[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.3022 - loss: 1.8521
Epoch 2/10
[1m660/660[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.3213 - loss: 1.8102
Epoch 3/10
[1m660/660[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.3199 - loss: 1.7911
Epoch 4/10
[1m660/660[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.3261 - loss: 1.7820
Epoch 5/10
[1m660/660[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.3234 - loss: 1.7748
Epoch 6/10
[1m660/660[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.3271 - loss: 1.7651
Epoch 7/10
[1m660/660[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.3304 - loss: 1.7584
Epoch 8/10
[1m660/660[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.3404 - loss: 1.7485
Epoch 9/10
[1m660/660[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x1c8afb92750>

In [299]:
model.evaluate(X_test, test_labels_onehot)

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 990us/step - accuracy: 0.1957 - loss: 2.3564


[2.430546522140503, 0.19954687356948853]

In [295]:
print(y_train.value_counts().sort_index())

Intensity_Value
0.0    1684
1.0    2930
2.0    3696
3.0    1208
4.0    3972
5.0    1040
6.0    2317
7.0    1234
8.0     720
9.0    2293
Name: count, dtype: int64


In [296]:
class_counts = y_train.value_counts().sort_index()
total = len(y_train)
class_weights = {i: total / (len(class_counts) * count) for i, count in enumerate(class_counts)}
print(class_weights)

{0: 1.2526128266033254, 1: 0.7199317406143345, 2: 0.5707251082251082, 3: 1.7461920529801325, 4: 0.531067472306143, 5: 2.0282692307692307, 6: 0.9104013810962451, 7: 1.7094003241491087, 8: 2.9297222222222223, 9: 0.9199302224160488}


In [297]:
history = model.fit(
    X_train, 
    train_labels_onehot,
    epochs=10,  # Increase epochs
    class_weight=class_weights,  # Critical for imbalance
    validation_split=0.2,  # Add validation split
    # callbacks=[tf.keras.callbacks.EarlyStopping(patience=3)]
)

Epoch 1/10
[1m528/528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.2579 - loss: 1.9726 - val_accuracy: 0.1889 - val_loss: 2.1195
Epoch 2/10
[1m528/528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.2575 - loss: 1.9194 - val_accuracy: 0.1598 - val_loss: 2.2028
Epoch 3/10
[1m528/528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.2529 - loss: 1.9188 - val_accuracy: 0.1896 - val_loss: 2.1438
Epoch 4/10
[1m528/528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.2615 - loss: 1.9117 - val_accuracy: 0.1543 - val_loss: 2.2216
Epoch 5/10
[1m528/528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.2648 - loss: 1.8921 - val_accuracy: 0.1315 - val_loss: 2.3575
Epoch 6/10
[1m528/528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.2707 - loss: 1.8794 - val_accuracy: 0.1313 - val_loss: 2.3316
Epoch 7/10
[1m528/528[0m 