In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd

# Set the file path
file_path = '/content/drive/My Drive/Conference paper/dataset/train_dataset.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

# Display the first few rows of the dataframe
df.head()


Unnamed: 0,Present_Tmax,Present_Tmin,LDAPS_RHmin,LDAPS_RHmax,LDAPS_Tmax_lapse,LDAPS_Tmin_lapse,LDAPS_WS,LDAPS_LH,LDAPS_CC1,LDAPS_CC2,...,LDAPS_PPT1,LDAPS_PPT2,LDAPS_PPT3,LDAPS_PPT4,lat,lon,DEM,Slope,Solar radiation,Next_Tmax
0,30.3,21.1,61.741207,87.262192,26.62943,23.075077,4.263528,21.399026,0.297776,0.616711,...,0.0,0.0,0.000479,0.0,37.5776,126.938,75.0924,1.7678,5131.66748,26.4
1,29.3,20.9,85.943954,97.694801,25.380395,23.275475,8.697955,53.165713,0.901123,0.86735,...,2.642577,0.261794,0.288428,2.315489,37.5507,126.988,132.118,0.5931,5918.901367,25.6
2,29.5,22.2,32.008099,69.96637,31.99167,21.475995,10.4615,37.582673,0.012838,0.08609,...,0.0,0.0,0.0,0.0,37.5507,126.937,30.0464,0.8552,5222.419434,30.6
3,32.6,21.2,60.070614,91.244827,27.043616,23.338373,15.686361,95.842065,0.62258,0.388476,...,0.285723,0.335796,0.0,0.0,37.6046,127.032,44.7624,0.5141,5558.630371,27.2
4,33.0,27.6,62.508846,86.983963,31.529577,27.307177,6.493082,49.887497,0.438076,0.458994,...,0.0,0.0,0.0,0.0,37.5102,127.042,54.6384,0.1457,5635.460449,32.8


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

# Install TensorFlow if not installed
try:
    import tensorflow as tf
except ImportError:
    !pip install tensorflow
    import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import accuracy_score, classification_report, mean_absolute_error, mean_squared_error, r2_score


# Standardize column names (strip spaces & make lowercase)
df.columns = df.columns.str.strip().str.lower()

# Print column names to check actual names
print("Dataset Columns:", df.columns.tolist())

# Define features and target
X = df.drop(columns=["next_tmax"]).fillna(df.median())
y = df["next_tmax"].fillna(df["next_tmax"].median())

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train & Evaluate Random Forest Model
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)
rf_regressor.fit(X_train_scaled, y_train)
rf_preds = rf_regressor.predict(X_test_scaled)

# Train & Evaluate XGBoost Model
xgb_regressor = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
xgb_regressor.fit(X_train_scaled, y_train)
xgb_preds = xgb_regressor.predict(X_test_scaled)

# Train & Evaluate LSTM Model (Deep Learning)
X_train_lstm = X_train_scaled.reshape((X_train_scaled.shape[0], X_train_scaled.shape[1], 1))
X_test_lstm = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

lstm_model = Sequential([
    LSTM(50, activation='relu', input_shape=(X_train_lstm.shape[1], 1), return_sequences=True),
    Dropout(0.2),
    LSTM(25, activation='relu'),
    Dense(1)
])

lstm_model.compile(optimizer='adam', loss='mse', metrics=['mae'])
lstm_model.fit(X_train_lstm, y_train, epochs=20, batch_size=32, verbose=0)
lstm_preds = lstm_model.predict(X_test_lstm).flatten()

# Compute Regression Metrics
metrics = {
    "Model": ["Random Forest", "XGBoost", "LSTM"],
    "MAE": [
        mean_absolute_error(y_test, rf_preds),
        mean_absolute_error(y_test, xgb_preds),
        mean_absolute_error(y_test, lstm_preds)
    ],
    "RMSE": [
        mean_squared_error(y_test, rf_preds) ** 0.5,
        mean_squared_error(y_test, xgb_preds) ** 0.5,
        mean_squared_error(y_test, lstm_preds) ** 0.5
    ],
    "R2 Score": [
        r2_score(y_test, rf_preds),
        r2_score(y_test, xgb_preds),
        r2_score(y_test, lstm_preds)
    ]
}
metrics_df = pd.DataFrame(metrics)

# Compare Model Performance
results = pd.DataFrame({
    "Model": ["Random Forest", "XGBoost", "LSTM"],
    "MAE": metrics["MAE"],
    "RMSE": metrics["RMSE"],
    "R2 Score": metrics["R2 Score"]
})

# Display results
print("\nModel Performance Comparison:\n")
print(results)


Dataset Columns: ['present_tmax', 'present_tmin', 'ldaps_rhmin', 'ldaps_rhmax', 'ldaps_tmax_lapse', 'ldaps_tmin_lapse', 'ldaps_ws', 'ldaps_lh', 'ldaps_cc1', 'ldaps_cc2', 'ldaps_cc3', 'ldaps_cc4', 'ldaps_ppt1', 'ldaps_ppt2', 'ldaps_ppt3', 'ldaps_ppt4', 'lat', 'lon', 'dem', 'slope', 'solar radiation', 'next_tmax']


  super().__init__(**kwargs)


[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step

Model Performance Comparison:

           Model       MAE      RMSE  R2 Score
0  Random Forest  0.747809  1.002083  0.895146
1        XGBoost  0.764131  1.011842  0.893093
2           LSTM  1.410579  1.839480  0.646678


In [None]:
# 📌 Compute Accuracy as 1 - (Normalized MAE)
accuracy_rf = 1 - (mean_absolute_error(y_test, rf_preds) / np.mean(y_test))
accuracy_xgb = 1 - (mean_absolute_error(y_test, xgb_preds) / np.mean(y_test))
accuracy_lstm = 1 - (mean_absolute_error(y_test, lstm_preds) / np.mean(y_test))

# 📌 Update Model Performance Table
metrics["Accuracy"] = [accuracy_rf, accuracy_xgb, accuracy_lstm]

# 📌 Convert results to DataFrame and display
metrics_df = pd.DataFrame(metrics)

print("\n📊 Model Performance with Accuracy:\n")
print(metrics_df)



📊 Model Performance with Accuracy:

           Model       MAE      RMSE  R2 Score  Accuracy
0  Random Forest  0.747809  1.002083  0.895146  0.975254
1        XGBoost  0.764131  1.011842  0.893093  0.974714
2           LSTM  1.410579  1.839480  0.646678  0.953323
