In [881]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [770]:
df = pd.read_csv("DataSet.csv")
df.head(5)

Unnamed: 0,Voltage_RMS,Voltage_Mean,Voltage_Peak_Value,Voltage_ShapeFactor,Duty_Peak_Value,Duty_RMS,Duty_Mean,Duty_ShapeFactor,Duty_Clearance_factor,Ilf_Mean
0,140.066695,140.064692,142.098841,1.000014,0.406643,0.400259,0.400253,1.000014,1.01595,70.372059
1,140.10259,140.102044,140.979457,1.000004,0.404295,0.391036,0.39055,1.001243,1.033908,56.311013
2,140.102709,140.102163,141.021557,1.000004,0.404372,0.390926,0.390429,1.001273,1.034394,46.920518
3,140.105313,140.104762,140.969869,1.000004,0.404298,0.390953,0.390459,1.001267,1.034134,40.222416
4,140.103499,140.102951,140.965729,1.000004,0.404417,0.390975,0.390483,1.001259,1.034383,35.187581


In [772]:
# 1. Remove low-variance columns except 'ilf_mean'
low_variance_cols = ["Voltage_ShapeFactor", "Duty_ShapeFactor"]
df.drop(columns=[col for col in low_variance_cols if col != 'Ilf_Mean'], inplace=True)

In [774]:
df.head()

Unnamed: 0,Voltage_RMS,Voltage_Mean,Voltage_Peak_Value,Duty_Peak_Value,Duty_RMS,Duty_Mean,Duty_Clearance_factor,Ilf_Mean
0,140.066695,140.064692,142.098841,0.406643,0.400259,0.400253,1.01595,70.372059
1,140.10259,140.102044,140.979457,0.404295,0.391036,0.39055,1.033908,56.311013
2,140.102709,140.102163,141.021557,0.404372,0.390926,0.390429,1.034394,46.920518
3,140.105313,140.104762,140.969869,0.404298,0.390953,0.390459,1.034134,40.222416
4,140.103499,140.102951,140.965729,0.404417,0.390975,0.390483,1.034383,35.187581


In [778]:
df.head(5)

Unnamed: 0,Voltage_RMS,Voltage_Mean,Voltage_Peak_Value,Duty_Peak_Value,Duty_RMS,Duty_Mean,Duty_Clearance_factor,Ilf_Mean
0,140.066695,140.064692,142.098841,0.406643,0.400259,0.400253,1.01595,70.372059
1,140.10259,140.102044,140.979457,0.404295,0.391036,0.39055,1.033908,56.311013
2,140.102709,140.102163,141.021557,0.404372,0.390926,0.390429,1.034394,46.920518
3,140.105313,140.104762,140.969869,0.404298,0.390953,0.390459,1.034134,40.222416
4,140.103499,140.102951,140.965729,0.404417,0.390975,0.390483,1.034383,35.187581


In [780]:
# 2. MinMax Scaling (excluding 'ilf_mean')
columns_to_scale = [col for col in df.columns if col != 'Ilf_Mean']
scaler = MinMaxScaler()
df_scaled = df.copy()
df_scaled[columns_to_scale] = scaler.fit_transform(df[columns_to_scale])

In [782]:
df_scaled.head(5)

Unnamed: 0,Voltage_RMS,Voltage_Mean,Voltage_Peak_Value,Duty_Peak_Value,Duty_RMS,Duty_Mean,Duty_Clearance_factor,Ilf_Mean
0,0.0,0.0,0.029627,0.021338,0.080553,0.08464,0.0,70.372059
1,0.000899,0.000936,0.001456,0.000715,0.001104,0.001216,0.952833,56.311013
2,0.000902,0.000939,0.002516,0.00139,0.000162,0.000174,0.978611,46.920518
3,0.000968,0.001004,0.001215,0.00074,0.000393,0.000426,0.964797,40.222416
4,0.000922,0.000959,0.001111,0.00179,0.000576,0.000634,0.978043,35.187581


In [784]:
# 3. Remove highly correlated features (excluding 'ilf_mean')
correlation_matrix = df_scaled.corr().abs()
upper_triangle = correlation_matrix.where(
    np.triu(np.ones(correlation_matrix.shape), k=1).astype(bool)
)
high_corr_features = [column for column in upper_triangle.columns if any(upper_triangle[column] > 0.95) and column != 'Ilf_Mean']
df_scaled.drop(columns=high_corr_features, inplace=True)

In [786]:
df_scaled.head(5)

Unnamed: 0,Voltage_RMS,Duty_Clearance_factor,Ilf_Mean
0,0.0,0.0,70.372059
1,0.000899,0.952833,56.311013
2,0.000902,0.978611,46.920518
3,0.000968,0.964797,40.222416
4,0.000922,0.978043,35.187581


In [788]:
# 4. Remove low-variation rows
df_scaled["row_std"] = df_scaled.std(axis=1)
df_filtered = df_scaled[df_scaled["row_std"] > 0.01].copy()  # Ensure it's a copy
df_filtered.drop(columns=["row_std"], inplace=True)


In [790]:
df_filtered.head(5)

Unnamed: 0,Voltage_RMS,Duty_Clearance_factor,Ilf_Mean
0,0.0,0.0,70.372059
1,0.000899,0.952833,56.311013
2,0.000902,0.978611,46.920518
3,0.000968,0.964797,40.222416
4,0.000922,0.978043,35.187581


In [792]:
# 5. Outlier Removal (Using IQR)
Q1 = df_filtered.quantile(0.25)
Q3 = df_filtered.quantile(0.75)
IQR = Q3 - Q1
df_final = df_filtered[~((df_filtered < (Q1 - 1.5 * IQR)) | (df_filtered > (Q3 + 1.5 * IQR))).any(axis=1)]

In [794]:
df_final.head(5)

Unnamed: 0,Voltage_RMS,Duty_Clearance_factor,Ilf_Mean
20,0.000926,0.971921,11.728514
21,0.000915,0.968267,11.2613
22,0.000974,0.984986,10.827457
23,0.000846,0.97831,10.428067
24,0.000966,0.96657,10.054172


In [796]:
# Safely modify the DataFrame using .loc[] to avoid the warning
df_final.loc[:, 'Voltage_RMS'] = df_final['Voltage_RMS'] * 100000  # Multiply Voltage_RMS by 100000
df_final.loc[:, 'Duty_Clearance_factor'] = df_final['Duty_Clearance_factor'] * 100 # Multiply Duty_Clearance_factor by 10


In [798]:
df_final.head(5)

Unnamed: 0,Voltage_RMS,Duty_Clearance_factor,Ilf_Mean
20,92.615733,97.192071,11.728514
21,91.545071,96.826741,11.2613
22,97.4375,98.498551,10.827457
23,84.630219,97.830988,10.428067
24,96.648509,96.657001,10.054172


In [800]:
df_final.size

4485

In [803]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

In [805]:
X = df_final.drop(columns=['Ilf_Mean'])  # Use the preprocessed features
y = df_final['Ilf_Mean']

In [807]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [809]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [811]:
y_pred = model.predict(X_test)

In [813]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"R² score: {r2}")
print(f"Mean Squared Error: {mse}")

R² score: -0.09355987677792377
Mean Squared Error: 6.2941063240804045


In [817]:
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

# Example dataset (Replace with actual data)
X = df_final.drop(columns=['Ilf_Mean'])
y = df_final['Ilf_Mean']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'C': [0.1, 1, 10, 100],
    'epsilon': [0.001, 0.01, 0.1, 1],
    'degree': [2, 3, 4]
}

grid_search = GridSearchCV(SVR(kernel='poly'), param_grid, cv=5, scoring='r2')
grid_search.fit(X_train_scaled, y_train)

print("Best Parameters:", grid_search.best_params_)

# Train best model
best_model = grid_search.best_estimator_
y_pred_best = best_model.predict(X_test_scaled)

r2_best = r2_score(y_test, y_pred_best)
print(f"Optimized R² Score: {r2_best:.4f}")
