In [2]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.svm import SVR 
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score

In [3]:
df = pd.read_csv('Steel_industry_data.csv')
df.head()

Unnamed: 0,date,Usage_kWh,Lagging_Current_Reactive.Power_kVarh,Leading_Current_Reactive_Power_kVarh,CO2(tCO2),Lagging_Current_Power_Factor,Leading_Current_Power_Factor,NSM,WeekStatus,Day_of_week,Load_Type
0,01/01/2018 00:15,3.17,2.95,0.0,0.0,73.21,100.0,900,Weekday,Monday,Light_Load
1,01/01/2018 00:30,4.0,4.46,0.0,0.0,66.77,100.0,1800,Weekday,Monday,Light_Load
2,01/01/2018 00:45,3.24,3.28,0.0,0.0,70.28,100.0,2700,Weekday,Monday,Light_Load
3,01/01/2018 01:00,3.31,3.56,0.0,0.0,68.09,100.0,3600,Weekday,Monday,Light_Load
4,01/01/2018 01:15,3.82,4.5,0.0,0.0,64.72,100.0,4500,Weekday,Monday,Light_Load


In [4]:
print("Original Shape of the DATA")
print(f"Original data shape: {df.shape}")

Original Shape of the DATA
Original data shape: (35040, 11)


In [5]:
# from the svr.ipynb notebook, we know that our columns have no null or missing values. therefore we skip the step where we check for null values 

In [6]:
target = 'Usage_kWh'
X = df.drop(target, axis = 1)
y = df[target]
print(y)

0        3.17
1        4.00
2        3.24
3        3.31
4        3.82
         ... 
35035    3.85
35036    3.74
35037    3.78
35038    3.78
35039    3.67
Name: Usage_kWh, Length: 35040, dtype: float64


In [7]:
X = X.drop('date', axis=1)
numeric_features = X.select_dtypes(include = np.number).columns
categorical_features = X.select_dtypes(include = 'object').columns
print(numeric_features)
print('-' * 60)
print(categorical_features)

Index(['Lagging_Current_Reactive.Power_kVarh',
       'Leading_Current_Reactive_Power_kVarh', 'CO2(tCO2)',
       'Lagging_Current_Power_Factor', 'Leading_Current_Power_Factor', 'NSM'],
      dtype='object')
------------------------------------------------------------
Index(['WeekStatus', 'Day_of_week', 'Load_Type'], dtype='object')


In [8]:
X = pd.get_dummies(X, columns=categorical_features, drop_first=True)
print(X.shape)
# we performed one hot encoding here

(35040, 15)


In [9]:
# test_size=0.2 means 20% of data is saved for testing
# random_state=42 ensures you get the same split every time you run the code
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")

X_train shape: (28032, 15)
X_test shape: (7008, 15)


In [11]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

single_svr = SVR(
    C=100, 
    kernel='rbf', 
    gamma='scale'
)

# 2. Train the model ONCE using your training data
# This is the single 'fit' operation.
print("Starting single SVR fit...")
# Use your actual training data variables here (X_train, y_train)
single_svr.fit(X_train, y_train) 
print("Fit complete.")

# 3. Use the trained model to make predictions
y_pred = single_svr.predict(X_test)




Starting single SVR fit...
Fit complete.


In [12]:
# 4. Evaluate the performance
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print(f"\nMSE: {mse:.2f}")
print(f"R2 Score: {r2:.2f}")

print(f"RMSE score : {rmse:.4f}")
print(f"MAE Score: {mae}")


MSE: 800.09
R2 Score: 0.30
RMSE score : 28.2858
MAE Score: 18.08140275985619
