### Import the Libraries and Load the Data

In [1]:
import pandas as pd

# Load training data
train_data = pd.read_csv('train.csv')
# Load test data
test_data = pd.read_csv('test.csv')

# Preview the data
print(train_data.head())


                  date   HUFL   HULL   MUFL   MULL   LUFL   LULL         OT
0  2016-07-01 00:00:00  5.827  2.009  1.599  0.462  4.203  1.340  30.531000
1  2016-07-01 00:15:00  5.760  2.076  1.492  0.426  4.264  1.401  30.459999
2  2016-07-01 00:30:00  5.760  1.942  1.492  0.391  4.234  1.310  30.038000
3  2016-07-01 00:45:00  5.760  1.942  1.492  0.426  4.234  1.310  27.013000
4  2016-07-01 01:00:00  5.693  2.076  1.492  0.426  4.142  1.371  27.787001


### Data Preprocessing

In [2]:
# Check for missing values
print(train_data.isnull().sum())

date    0
HUFL    0
HULL    0
MUFL    0
MULL    0
LUFL    0
LULL    0
OT      0
dtype: int64


### Feature Engineering

In [3]:
# Creating lag features
for lag in range(1, 25):  # Lag features for the last 24 hours
    train_data[f'OT_lag_{lag}'] = train_data['OT'].shift(lag)

# Rolling mean
train_data['OT_roll_mean_24'] = train_data['OT'].rolling(window=24).mean()

# Drop rows with NaN values generated due to lagging
train_data.dropna(inplace=True)


### Data Exploration and Insights (EDA)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Plot the oil temperature over time
plt.figure(figsize=(10, 6))
plt.plot(train_data['date'], train_data['OT'], label='Oil Temperature')
plt.title('Oil Temperature Over Time')
plt.xlabel('Date')
plt.ylabel('Oil Temperature (OT)')
plt.legend()
plt.show()


### Model Building

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Split the data into train and validation sets
X = train_data.drop(['date', 'OT'], axis=1)
y = train_data['OT']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, shuffle=False)

# Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=0)
model.fit(X_train, y_train)

# Predict on validation set
y_pred = model.predict(X_val)

# Evaluation
mae = mean_absolute_error(y_val, y_pred)
rmse = mean_squared_error(y_val, y_pred, squared=False)

print(f'MAE: {mae}, RMSE: {rmse}')


### Model Evaluation

In [None]:
# Calculate MAPE
mape = np.mean(np.abs((y_val - y_pred) / y_val)) * 100

print(f'MAPE: {mape}')


### Forecasting the Next 24 Hours

In [None]:
# Scale the test data similarly
scaled_test_data = pd.DataFrame(scaler.transform(test_data.drop('date', axis=1)), columns=test_data.columns[1:])

# Predict the next 24 hours of oil temperature
predictions = model.predict(scaled_test_data)
print(predictions)
