In [14]:
import pandas as pd
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import StandardScaler

In [15]:
# Step 1: Load the training and testing datasets
train_data = pd.read_csv("p1_train.csv")
test_data = pd.read_csv("p1_test.csv")

# Extract features and target variable from the datasets
X_train = train_data.iloc[:, :-1]  # Features (all columns except the last one)
y_train = train_data.iloc[:, -1]   # Target variable (last column)

X_test = test_data.iloc[:, :-1]    # Features in the test set
y_test = test_data.iloc[:, -1]     # Target variable in the test set

In [16]:
print("Training dataset columns:", X_train.columns)
print("Test dataset columns:", X_test.columns)

Training dataset columns: Index(['-7.262173392018990370e+00', '9.572603824406265005e+00'], dtype='object')
Test dataset columns: Index(['1.589300268390259419e+01', '1.171282902260990966e+01'], dtype='object')


In [17]:
# Rename the columns in the test dataset to match those in the training dataset
X_test.columns = X_train.columns

# Now scale the test dataset using the same scaler
scaler = StandardScaler()
scaler.fit(X_train)
X_test_scaled = scaler.transform(X_test)

In [18]:
# Step 2: Preprocess the data if necessary
# You may want to scale the features for SVR
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [19]:
# Step 3: Train linear regression model
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

# List down the coefficients of the linear regression model
coefficients = linear_model.coef_
intercept = linear_model.intercept_

print("Coefficients:", coefficients)
print("Intercept:", intercept)

Coefficients: [-1.99966328 -0.99729226]
Intercept: 2.9791572990622712


In [20]:
# Step 4: Train Support Vector Regression (SVR) model
svr_model = SVR(kernel='linear')  # You can try different kernels like 'rbf' or 'poly' as well
svr_model.fit(X_train_scaled, y_train)

# List down the support vectors of the SVR model
support_vectors = svr_model.support_vectors_

# List down the coefficients of the SVR model (if applicable)
if svr_model.kernel == 'linear':
    coefficients = svr_model.coef_
else:
    coefficients = None

# List down the intercept of the SVR model (if applicable)
intercept = svr_model.intercept_

print("Support Vectors:", support_vectors)
print("Coefficients:", coefficients)
print("Intercept:", intercept)

Support Vectors: [[ 0.3093122   0.92220586]
 [-1.1156724  -0.30885702]
 [ 0.62284599 -1.23476492]
 ...
 [ 1.53236038  0.97809193]
 [ 0.05560497 -1.33192606]
 [ 0.94148424 -0.24564666]]
Coefficients: [[-23.09760648 -11.44225397]]
Intercept: [3.12286914]


In [21]:
# Step 5: Evaluate the trained models
# For Linear Regression
y_pred_linear = linear_model.predict(X_test)
mse_linear = mean_squared_error(y_test, y_pred_linear)
mae_linear = mean_absolute_error(y_test, y_pred_linear)

# For SVR
y_pred_svr = svr_model.predict(X_test_scaled)
mse_svr = mean_squared_error(y_test, y_pred_svr)
mae_svr = mean_absolute_error(y_test, y_pred_svr)

In [22]:
# Step 6: Report the evaluation metrics
print("Linear Regression:")
print("Mean Squared Error (MSE):", mse_linear)
print("Mean Absolute Error (MAE):", mae_linear)

print("\nSupport Vector Regression (SVR):")
print("Mean Squared Error (MSE):", mse_svr)
print("Mean Absolute Error (MAE):", mae_svr)

Linear Regression:
Mean Squared Error (MSE): 5.045760259110822
Mean Absolute Error (MAE): 1.7988557623211767

Support Vector Regression (SVR):
Mean Squared Error (MSE): 5.044466794186341
Mean Absolute Error (MAE): 1.7985159176744716


In [23]:
# Step 5: Evaluate the trained models using test data
# For Linear Regression
y_pred_linear_test = linear_model.predict(X_test)
mse_linear_test = mean_squared_error(y_test, y_pred_linear_test)
mae_linear_test = mean_absolute_error(y_test, y_pred_linear_test)

# For SVR
y_pred_svr_test = svr_model.predict(X_test_scaled)
mse_svr_test = mean_squared_error(y_test, y_pred_svr_test)
mae_svr_test = mean_absolute_error(y_test, y_pred_svr_test)

# Report the evaluation metrics for test data
print("\nEvaluation Metrics on Test Data:")
print("Linear Regression:")
print("Mean Squared Error (MSE):", mse_linear_test)
print("Mean Absolute Error (MAE):", mae_linear_test)

print("\nSupport Vector Regression (SVR):")
print("Mean Squared Error (MSE):", mse_svr_test)
print("Mean Absolute Error (MAE):", mae_svr_test)


Evaluation Metrics on Test Data:
Linear Regression:
Mean Squared Error (MSE): 5.045760259110822
Mean Absolute Error (MAE): 1.7988557623211767

Support Vector Regression (SVR):
Mean Squared Error (MSE): 5.044466794186341
Mean Absolute Error (MAE): 1.7985159176744716
