In [2]:
# Imports
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.ensemble import RandomForestRegressor

In [3]:
#  Load Data
train_df = pd.read_csv("/kaggle/input/weather-prediction-2025/train.csv")
test_df  = pd.read_csv("/kaggle/input/weather-prediction-2025/test.csv")

print(train_df.shape)
print(test_df.shape)

train_df.head()


(1935, 6)
(154, 6)


Unnamed: 0,Date,Maximum Temperature degrees (F),Minimum Temperature degrees (F),Precipitation (inches),Snow (inches),Snow Depth (inches)
0,2020-01-01,36.0,13.0,0.00,0.00,5.0
1,2020-01-02,37.0,31.0,0.00,0.00,5.0
2,2020-01-03,36.0,24.0,T,T,4.0
3,2020-01-04,30.0,24.0,0.00,0.00,4.0
4,2020-01-05,38.0,24.0,0.01,T,4.0


In [4]:
# Identify Target & Features
TARGET = "Minimum Temperature degrees (F)"

X = train_df.drop(columns=[TARGET])
y = train_df[TARGET]

In [6]:
# Train / Validation Split
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42 )

In [8]:
# Convert Date column (DO NOT DROP IT)
train_df["Date"] = pd.to_datetime(train_df["Date"])
test_df["Date"]  = pd.to_datetime(test_df["Date"])

for df in [train_df, test_df]:
    df["year"] = df["Date"].dt.year
    df["month"] = df["Date"].dt.month
    df["day"] = df["Date"].dt.day
    df["dayofweek"] = df["Date"].dt.dayofweek


In [9]:
# Fix Object Weather Columns
weather_cols = [
    "Precipitation (inches)",
    "Snow (inches)",
    "Snow Depth (inches)"
]

for col in weather_cols:
    train_df[col] = pd.to_numeric(train_df[col], errors="coerce")
    test_df[col]  = pd.to_numeric(test_df[col], errors="coerce")


In [10]:
#Handle Missing Values ( RandomForest cannot handle NaN)
train_df = train_df.fillna(0)
test_df  = test_df.fillna(0)

In [11]:
# drop 
train_df = train_df.drop(columns=["Date"])
test_df  = test_df.drop(columns=["Date"])


In [14]:
# train modele
model = RandomForestRegressor(
    n_estimators=200,
    max_depth=10,
    random_state=42,
    n_jobs=-1
)

model.fit(X_train, y_train)


In [19]:
# epsilon-stabilized MAPE
def safe_mape(y_true, y_pred, epsilon=1e-3):
    return np.mean(np.abs((y_true - y_pred) / (y_true + epsilon)))

safe_mape(y_val, val_preds)


7.503704043833911

In [16]:
# Train on Full Data
model.fit(X, y)

In [22]:
# drop in test data
test_X = test_df.drop(columns=[TARGET], errors='ignore')

In [24]:
# Predict Test Set
test_predictions = model.predict(test_X)

In [25]:
# Create Submission File
submission = pd.DataFrame({
    "ID": np.arange(len(test_predictions)),
    "Minimum Temperature degrees (F)": test_predictions
})

submission.head()


Unnamed: 0,ID,Minimum Temperature degrees (F)
0,0,43.653193
1,1,41.173931
2,2,40.691165
3,3,39.766689
4,4,44.843831


In [26]:
# Save Submission
submission.to_csv("submission.csv", index=False)