In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_regression
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.metrics import mean_squared_error

In [None]:
# Replace 'file_path.csv' with the path to your CSV file
df = pd.read_csv('../Data/csv/WashingtonWeather.csv')


In [None]:
df = df.drop(df[['Unnamed: 0', 'WBAN', 'LBL', 'YEARMODA', 'DAY', 'CTRY', 'STATE', 'LAT', 'LON']], axis=1)

In [None]:
# Filter data before and after 2019 for train and test sets
train_data = df[df['YEAR'] < 2019]
test_data = df[df['YEAR'] >= 2019]

In [None]:
# Feature Selection using PCA
target_train = train_data['PRCP']
features_train = train_data.drop(['PRCP'], axis=1)

In [None]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features_train)

In [None]:
pca = PCA(n_components=5)  # You can change the number of components
principal_components = pca.fit_transform(scaled_features)
principal_df = pd.DataFrame(data=principal_components)

In [None]:
# Concatenate the principal components with the target variable for train data
final_train_df = pd.concat([principal_df, target_train], axis=1)

In [None]:
# Split train data into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(final_train_df.drop(['PRCP'], axis=1), final_train_df['PRCP'], test_size=0.2, random_state=42)

In [None]:
# LSTM model
X_train = np.reshape(X_train.values, (X_train.shape[0], 1, X_train.shape[1]))
X_val = np.reshape(X_val.values, (X_val.shape[0], 1, X_val.shape[1]))

In [None]:
model = Sequential()
model.add(LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

In [None]:
# Training the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

In [None]:
# Prepare test data
target_test = test_data['PRCP']
features_test = test_data.drop(['PRCP'], axis=1)
scaled_features_test = scaler.transform(features_test)

In [None]:
pca_test = PCA(n_components=5)  # Same number of components as trained PCA
principal_components_test = pca_test.fit_transform(scaled_features_test)
principal_df_test = pd.DataFrame(data=principal_components_test)

In [None]:
# Concatenate the principal components with the target variable for test data
final_test_df = pd.concat([principal_df_test, target_test], axis=1)

In [None]:
# Prepare test data for LSTM
y_test = final_test_df['PRCP']
final_test_df = final_test_df.drop(['PRCP'], axis=1)
X_test = np.reshape(final_test_df.values, (final_test_df.shape[0], 1, final_test_df.shape[1]))

In [None]:
# Testing the model
predicted_values = model.predict(X_test)

In [None]:
predicted_values

In [None]:
# Calculate error metrics
mse = mean_squared_error(y_test, predicted_values)
print(f"Mean Squared Error (MSE): {mse}")

In [None]:
df.columns

In [None]:
# Select relevant columns for training
columns_to_use = ['USAF', 'YEAR', 'MONTH', 'TEMP', 'DEWP', 'WDSP', 'MAX', 'MIN', 'PRCP']
train_data = df[columns_to_use]

# Filter data for training (before 2019)
train_data = train_data[train_data['YEAR'] < 2019]

# Splitting data into features (X) and target (y)
X = train_data.drop('PRCP', axis=1)  # Features
y = train_data['PRCP']  # Target

# Normalizing data using Min-Max Scaler
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

# Splitting the dataset into training and testing sets (using 2019 data for testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Separate the target variable
target = train_data['PRCP']
features = train_data.drop(['PRCP'], axis=1)

# Standardize the features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# Apply PCA
pca = PCA(n_components=5)  # Specify the number of components you want to keep
principal_components = pca.fit_transform(scaled_features)

# Create a dataframe with the principal components
principal_df = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2', 'PC3', 'PC4', 'PC5'])

# Concatenate the principal components with the target variable
final_df = pd.concat([principal_df, target], axis=1)


In [None]:
final_df.head()