In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import xarray as xr

In [2]:
df=xr.open_dataset('project_small2.nc')

In [3]:
print(df)

<xarray.Dataset>
Dimensions:    (longitude: 1440, latitude: 721, time: 744)
Coordinates:
  * longitude  (longitude) float32 0.0 0.25 0.5 0.75 ... 359.0 359.2 359.5 359.8
  * latitude   (latitude) float32 90.0 89.75 89.5 89.25 ... -89.5 -89.75 -90.0
  * time       (time) datetime64[ns] 2020-01-01 ... 2020-01-31T23:00:00
Data variables:
    t          (time, latitude, longitude) float32 ...
Attributes:
    Conventions:  CF-1.6
    history:      2023-11-06 10:53:18 GMT by grib_to_netcdf-2.25.1: /opt/ecmw...


In [4]:
# Access a specific coordinate
longitude_values = df['longitude'].values

In [5]:
longitude_values

array([0.0000e+00, 2.5000e-01, 5.0000e-01, ..., 3.5925e+02, 3.5950e+02,
       3.5975e+02], dtype=float32)

In [6]:
# Access a specific coordinate
latitude_values = df['latitude'].values

In [7]:
latitude_values

array([ 90.  ,  89.75,  89.5 ,  89.25,  89.  ,  88.75,  88.5 ,  88.25,
        88.  ,  87.75,  87.5 ,  87.25,  87.  ,  86.75,  86.5 ,  86.25,
        86.  ,  85.75,  85.5 ,  85.25,  85.  ,  84.75,  84.5 ,  84.25,
        84.  ,  83.75,  83.5 ,  83.25,  83.  ,  82.75,  82.5 ,  82.25,
        82.  ,  81.75,  81.5 ,  81.25,  81.  ,  80.75,  80.5 ,  80.25,
        80.  ,  79.75,  79.5 ,  79.25,  79.  ,  78.75,  78.5 ,  78.25,
        78.  ,  77.75,  77.5 ,  77.25,  77.  ,  76.75,  76.5 ,  76.25,
        76.  ,  75.75,  75.5 ,  75.25,  75.  ,  74.75,  74.5 ,  74.25,
        74.  ,  73.75,  73.5 ,  73.25,  73.  ,  72.75,  72.5 ,  72.25,
        72.  ,  71.75,  71.5 ,  71.25,  71.  ,  70.75,  70.5 ,  70.25,
        70.  ,  69.75,  69.5 ,  69.25,  69.  ,  68.75,  68.5 ,  68.25,
        68.  ,  67.75,  67.5 ,  67.25,  67.  ,  66.75,  66.5 ,  66.25,
        66.  ,  65.75,  65.5 ,  65.25,  65.  ,  64.75,  64.5 ,  64.25,
        64.  ,  63.75,  63.5 ,  63.25,  63.  ,  62.75,  62.5 ,  62.25,
      

In [8]:
# Access a specific coordinate
time_values = df['time'].values

In [9]:
time_values

array(['2020-01-01T00:00:00.000000000', '2020-01-01T01:00:00.000000000',
       '2020-01-01T02:00:00.000000000', '2020-01-01T03:00:00.000000000',
       '2020-01-01T04:00:00.000000000', '2020-01-01T05:00:00.000000000',
       '2020-01-01T06:00:00.000000000', '2020-01-01T07:00:00.000000000',
       '2020-01-01T08:00:00.000000000', '2020-01-01T09:00:00.000000000',
       '2020-01-01T10:00:00.000000000', '2020-01-01T11:00:00.000000000',
       '2020-01-01T12:00:00.000000000', '2020-01-01T13:00:00.000000000',
       '2020-01-01T14:00:00.000000000', '2020-01-01T15:00:00.000000000',
       '2020-01-01T16:00:00.000000000', '2020-01-01T17:00:00.000000000',
       '2020-01-01T18:00:00.000000000', '2020-01-01T19:00:00.000000000',
       '2020-01-01T20:00:00.000000000', '2020-01-01T21:00:00.000000000',
       '2020-01-01T22:00:00.000000000', '2020-01-01T23:00:00.000000000',
       '2020-01-02T00:00:00.000000000', '2020-01-02T01:00:00.000000000',
       '2020-01-02T02:00:00.000000000', '2020-01-02

In [10]:
# Access a specific coordinate
t_values = df['t'].values

In [11]:
t_values

array([[[232.21077, 232.21077, 232.21077, ..., 232.21077, 232.21077,
         232.21077],
        [232.34175, 232.34175, 232.34079, ..., 232.34271, 232.34271,
         232.34271],
        [232.3972 , 232.39626, 232.39626, ..., 232.39912, 232.39816,
         232.3972 ],
        ...,
        [237.75243, 237.75243, 237.75243, ..., 237.75339, 237.75339,
         237.75339],
        [237.52104, 237.52104, 237.52104, ..., 237.522  , 237.522  ,
         237.522  ],
        [237.24568, 237.24568, 237.24568, ..., 237.24568, 237.24568,
         237.24568]],

       [[232.09795, 232.09795, 232.09795, ..., 232.09795, 232.09795,
         232.09795],
        [232.21458, 232.21458, 232.21364, ..., 232.21555, 232.21555,
         232.21555],
        [232.27386, 232.27292, 232.27196, ..., 232.27673, 232.27579,
         232.27483],
        ...,
        [237.78781, 237.78685, 237.78685, ..., 237.78876, 237.78876,
         237.78781],
        [237.5545 , 237.5545 , 237.55356, ..., 237.5545 , 237.5545 ,
   

In [13]:
# Create a meshgrid for longitude, latitude, and time
lon, lat, time = np.meshgrid(longitude_values, latitude_values, time_values, indexing='ij')

# Reshape the data for the training dataset
lon_flat = lon.flatten()
lat_flat = lat.flatten()

# Convert datetime64 to numeric representation (Unix timestamps)
time_flat = time.astype(np.int64).flatten()

# Combine the values into a training dataset
training_dataset = np.column_stack((lon_flat, lat_flat, time_flat))


In [14]:
training_dataset

array([[ 0.0000000e+00,  9.0000000e+01,  1.5778368e+18],
       [ 0.0000000e+00,  9.0000000e+01,  1.5778404e+18],
       [ 0.0000000e+00,  9.0000000e+01,  1.5778440e+18],
       ...,
       [ 3.5975000e+02, -9.0000000e+01,  1.5805044e+18],
       [ 3.5975000e+02, -9.0000000e+01,  1.5805080e+18],
       [ 3.5975000e+02, -9.0000000e+01,  1.5805116e+18]])

In [15]:
# Target variable
t_values = df['t'].values.flatten()

In [16]:
t_values

array([232.21077, 232.21077, 232.21077, ..., 235.561  , 235.561  ,
       235.561  ], dtype=float32)

In [17]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(training_dataset, t_values, test_size=0.8, random_state=42)

In [18]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.3, random_state=42)

In [19]:
# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Reshape the features to include a "time" dimension (assuming time series length is sequence_length)
sequence_length = X_train.shape[1]
X_train = X_train.reshape(-1, sequence_length, 1)
X_test = X_test.reshape(-1, sequence_length, 1)

In [20]:
# Define batch size
batch_size = 9600

# Vision Transformer for time series classification
class TransformerModel(tf.keras.models.Model):
    def __init__(self, num_heads, ff_dim, num_transformer_blocks, mlp_units, input_shape):
        super(TransformerModel, self).__init__()

        self.embedding = tf.keras.layers.Dense(units=num_heads, input_shape=input_shape)
        self.transformer_blocks = [
            tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=num_heads, dropout=0.1) for _ in range(num_transformer_blocks)
        ]
        self.mlp = tf.keras.models.Sequential([
            tf.keras.layers.Dense(units=mlp_units, activation='relu'),
            tf.keras.layers.Dropout(0.1),
            tf.keras.layers.Dense(units=1, activation='sigmoid')  # Assuming binary classification
        ])

    def call(self, inputs):
        x = self.embedding(inputs)
        for transformer_block in self.transformer_blocks:
            x = transformer_block(x, x)
        x = tf.reduce_mean(x, axis=1)  # Global average pooling over the time dimension
        return self.mlp(x)

# Create and compile the model
model = TransformerModel(num_heads=8,
                         ff_dim=32,
                         num_transformer_blocks=4,
                         mlp_units=64,
                         input_shape=(sequence_length, 1))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model with batch size
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), batch_size=batch_size)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x18b24d27b50>

In [22]:
from sklearn.metrics import confusion_matrix

# Assuming y_test and y_pred are NumPy arrays with integer labels for a 31-class problem
y_test = y_test.astype(int)
y_pred = y_pred.astype(int)

conf_mat = confusion_matrix(y_test, y_pred)

# Calculate accuracy
accuracy = sum(conf_mat.diagonal()) / conf_mat.sum()

# Print accuracy
print(f'Accuracy: {accuracy:.4f}')

# Calculate precision, recall, and F1 score for each class with a check for zero denominator
precision_per_class = []
recall_per_class = []
f1_per_class = []

for i in range(31):
    true_positive = conf_mat[i, i]
    false_positive = sum(conf_mat[:, i]) - true_positive
    false_negative = sum(conf_mat[i, :]) - true_positive

    precision_i = true_positive / (true_positive + false_positive) if (true_positive + false_positive) != 0 else 0
    recall_i = true_positive / (true_positive + false_negative) if (true_positive + false_negative) != 0 else 0

    f1_i = 2 * (precision_i * recall_i) / (precision_i + recall_i) if (precision_i + recall_i) != 0 else 0

    precision_per_class.append(precision_i)
    recall_per_class.append(recall_i)
    f1_per_class.append(f1_i)

# Overall precision, recall, and F1 score
overall_precision = sum(precision_per_class) / len(precision_per_class)
overall_recall = sum(recall_per_class) / len(recall_per_class)
overall_f1 = sum(f1_per_class) / len(f1_per_class)

# Print precision, recall, and F1 score for each class
for i, (precision_i, recall_i, f1_i) in enumerate(zip(precision_per_class, recall_per_class, f1_per_class)):
    print(f'Class {i}: Precision: {precision_i:.4f}, Recall: {recall_i:.4f}, F1 Score: {f1_i:.4f}')

# Print overall precision, recall, F1 score, and accuracy
print(f'Overall Precision: {overall_precision:.4f}')
print(f'Overall Recall: {overall_recall:.4f}')
print(f'Overall F1 Score: {overall_f1:.4f}')
print(f'Accuracy: {accuracy:.4f}')


Accuracy: 0.0000
Class 0: Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Class 1: Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Class 2: Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Class 3: Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Class 4: Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Class 5: Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Class 6: Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Class 7: Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Class 8: Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Class 9: Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Class 10: Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Class 11: Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Class 12: Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Class 13: Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Class 14: Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Class 15: Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
C