In [3]:
# Required Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.metrics import mean_squared_error, accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Load Dataset
data = pd.read_csv('Weather_data.csv')

# Check if 'Rainfall' column exists, if not create one based on 'precip_mm'
if 'Rainfall' not in data.columns:
    if 'precip_mm' in data.columns:
        data['Rainfall'] = np.where(data['precip_mm'] > 0, 1, 0)
    elif 'precip_in' in data.columns:
        data['Rainfall'] = np.where(data['precip_in'] > 0, 1, 0)
    else:
        raise ValueError("No precipitation column ('precip_mm' or 'precip_in') found to derive 'Rainfall'.")

# Identify categorical columns (those with strings)
categorical_columns = data.select_dtypes(include=['object']).columns

# Apply OneHotEncoder to categorical columns
data = pd.get_dummies(data, columns=categorical_columns)

# Features and Target
X = data.drop(columns=['Rainfall'])
y = data['Rainfall']

# Encode the target variable if it is categorical
le = LabelEncoder()
y = le.fit_transform(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# LSTM works with sequences, so reshape the data into 3D (samples, timesteps, features)
X_train_scaled = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_scaled = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

# LSTM Model
model = Sequential()
model.add(LSTM(units=50, activation='tanh', return_sequences=True, input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=50, activation='tanh'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train_scaled, y_train, epochs=20, batch_size=32, validation_data=(X_test_scaled, y_test))

# Make predictions
y_pred_lstm = model.predict(X_test_scaled)
y_pred_lstm = (y_pred_lstm > 0.5).astype(int)

# Evaluate the model
mse_lstm = mean_squared_error(y_test, y_pred_lstm)
accuracy_lstm = accuracy_score(y_test, y_pred_lstm)

print(f"LSTM Model - MSE: {mse_lstm}, Accuracy: {accuracy_lstm}")


  super().__init__(**kwargs)


Epoch 1/20
[1m527/527[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.8091 - loss: 0.4393 - val_accuracy: 0.9244 - val_loss: 0.1723
Epoch 2/20
[1m527/527[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9190 - loss: 0.1778 - val_accuracy: 0.9367 - val_loss: 0.1470
Epoch 3/20
[1m527/527[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9283 - loss: 0.1586 - val_accuracy: 0.9389 - val_loss: 0.1349
Epoch 4/20
[1m527/527[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9396 - loss: 0.1382 - val_accuracy: 0.9531 - val_loss: 0.1151
Epoch 5/20
[1m527/527[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9484 - loss: 0.1199 - val_accuracy: 0.9547 - val_loss: 0.1061
Epoch 6/20
[1m527/527[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9523 - loss: 0.1102 - val_accuracy: 0.9650 - val_loss: 0.0872
Epoch 7/20
[1m527/527[0m 

In [2]:
# Required Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Load Dataset
data = pd.read_csv('Weather_data.csv')

# Check if 'Rainfall' column exists, if not, create one based on 'precip_mm' or 'precip_in'
if 'Rainfall' not in data.columns:
    if 'precip_mm' in data.columns:
        data['Rainfall'] = np.where(data['precip_mm'] > 0, 1, 0)  # Rainfall if precipitation > 0
    elif 'precip_in' in data.columns:
        data['Rainfall'] = np.where(data['precip_in'] > 0, 1, 0)
    else:
        raise ValueError("No precipitation column ('precip_mm' or 'precip_in') found to derive 'Rainfall'.")

# Drop columns that won't be used for prediction, such as 'last_updated_epoch'
# Adjust columns as needed based on your specific use case
data = data.drop(columns=['last_updated_epoch', 'precip_mm', 'precip_in'])  # Drop target-related columns

# Handle categorical column 'wind_direction' using Label Encoding
le = LabelEncoder()
data['wind_direction'] = le.fit_transform(data['wind_direction'])

# Define Features (X) and Target (y)
X = data.drop(columns=['Rainfall'])  # Features
y = data['Rainfall']  # Target

# Train-test split (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Feature Scaling (important for neural networks)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# LSTM works with sequences, so reshape the data into 3D (samples, timesteps, features)
# We'll use timesteps = 1 because each row represents one instance
X_train_scaled = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_scaled = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

# LSTM Model
model = Sequential()
model.add(LSTM(units=50, activation='tanh', return_sequences=True, input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=50, activation='tanh'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))  # Binary classification (Rainfall: 0 or 1)

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train_scaled, y_train, epochs=20, batch_size=32, validation_data=(X_test_scaled, y_test))

# Make predictions
y_pred_lstm = model.predict(X_test_scaled)
y_pred_lstm = (y_pred_lstm > 0.5).astype(int)  # Threshold at 0.5 for binary classification

# Evaluate the model
mse_lstm = mean_squared_error(y_test, y_pred_lstm)
accuracy_lstm = accuracy_score(y_test, y_pred_lstm)

print(f"LSTM Model - MSE: {mse_lstm}, Accuracy: {accuracy_lstm}")



Epoch 1/20


  super().__init__(**kwargs)


[1m527/527[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.7936 - loss: 0.4605 - val_accuracy: 0.8797 - val_loss: 0.2614
Epoch 2/20
[1m527/527[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8735 - loss: 0.2737 - val_accuracy: 0.8812 - val_loss: 0.2560
Epoch 3/20
[1m527/527[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8744 - loss: 0.2639 - val_accuracy: 0.8817 - val_loss: 0.2502
Epoch 4/20
[1m527/527[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8694 - loss: 0.2673 - val_accuracy: 0.8817 - val_loss: 0.2450
Epoch 5/20
[1m527/527[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8763 - loss: 0.2598 - val_accuracy: 0.8838 - val_loss: 0.2446
Epoch 6/20
[1m527/527[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8739 - loss: 0.2593 - val_accuracy: 0.8864 - val_loss: 0.2398
Epoch 7/20
[1m527/527[0m [32m━━━━━━━