# Q1

In [17]:
from sklearn.model_selection import train_test_split,GridSearchCV ,cross_val_score
import numpy as np
import pandas as pd
from sklearn.svm import SVC ,SVR
from sklearn.metrics import accuracy_score, mean_absolute_error, r2_score

data = pd.read_csv('Prediction of students performance.csv')

labels = data['Target']



In [4]:
X = data.drop('Target', axis=1)
y = data['Target']

# Split the data into training, validation, and test sets
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=42)
train_X, valid_X, train_y, valid_y = train_test_split(train_X, train_y, test_size=0.25, random_state=42)

## A


In [3]:
# Train the linear SVM model
svm_model = SVC(kernel='linear')
svm_model.fit(train_X, train_y)

# Predictions on training and test sets
train_predictions = svm_model.predict(train_X)
test_predictions = svm_model.predict(test_X)

# Calculate accuracy on training and test sets
train_accuracy = accuracy_score(train_y, train_predictions)
test_accuracy = accuracy_score(test_y, test_predictions)

# Number of support vectors
support_vectors_count = len(svm_model.support_vectors_)

# Print the results
print(f"Accuracy on training set: {train_accuracy:.2f}")
print(f"Accuracy on test set: {test_accuracy:.2f}")
print(f"Number of support vectors: {support_vectors_count}")

Accuracy on training set: 0.77
Accuracy on test set: 0.74
Number of support vectors: 1014


## B


In [6]:
svm_model = SVC(kernel='rbf', probability=True)

# Define the parameter grid to search
param_grid = {
    'C': [1000 * 10**n for n in range(5)],
    'gamma': [1e-10 * 10**n for n in range(3)]
}

# Use GridSearchCV to find the best parameters
grid_search = GridSearchCV(svm_model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(valid_X, valid_y)

# Get the best parameters
best_params = grid_search.best_params_


In [5]:
# Train the model with the best parameters on the entire training set
best_svm_model = SVC(kernel='rbf', C=best_params['C'], gamma=best_params['gamma'], probability=True)
best_svm_model.fit(train_X, train_y)

# Predictions on training and test sets
train_predictions = best_svm_model.predict(train_X)
test_predictions = best_svm_model.predict(test_X)

# Calculate accuracy on training and test sets
train_accuracy = accuracy_score(train_y, train_predictions)
test_accuracy = accuracy_score(test_y, test_predictions)

# Number of support vectors
support_vectors_count = len(best_svm_model.support_vectors_)

# Print the results
print(f"Best Parameters: {best_params}")
print(f"Accuracy on training set: {train_accuracy:.2f}")
print(f"Accuracy on test set: {test_accuracy:.2f}")
print(f"Number of support vectors: {support_vectors_count}")

Best Parameters: {'C': 10000000, 'gamma': 1e-08}
Accuracy on training set: 0.79
Accuracy on test set: 0.75
Number of support vectors: 1060


## C

In [7]:
poly_svm_model = SVC(kernel='poly', probability=True)

# Define the parameter grid for polynomial kernel
poly_param_grid = {
    'C': [1000 * 10**n for n in range(5)],
    'gamma': [1e-10 * 10**n for n in range(3)],
    'degree': [2, 3]
}

# Use GridSearchCV to find the best parameters for polynomial kernel
poly_grid_search = GridSearchCV(poly_svm_model, poly_param_grid, cv=5, scoring='accuracy')
poly_grid_search.fit(valid_X, valid_y)

# Get the best parameters for polynomial kernel
best_poly_params = poly_grid_search.best_params_


In [9]:
best_poly_svm_model = SVC(kernel='poly', C=best_poly_params['C'], degree=best_poly_params['degree'], probability=True)
best_poly_svm_model.fit(train_X, train_y)

train_poly_predictions = best_poly_svm_model.predict(train_X)
test_poly_predictions = best_poly_svm_model.predict(test_X)


# Calculate accuracy on training and test sets for both models
train_poly_accuracy = accuracy_score(train_y, train_poly_predictions)
test_poly_accuracy = accuracy_score(test_y, test_poly_predictions)


# Number of support vectors for both models
support_vectors_poly_count = len(best_poly_svm_model.support_vectors_)


# Print the results
print("Results for Polynomial Kernel:")
print(f"Best Parameters: {best_poly_params}")
print(f"Accuracy on training set: {train_poly_accuracy:.2f}")
print(f"Accuracy on test set: {test_poly_accuracy:.2f}")
print(f"Number of support vectors: {support_vectors_poly_count}\n")

print("Results for RBF Kernel:")
print(f"Best Parameters: {best_params}")
print(f"Accuracy on training set: {train_accuracy:.2f}")
print(f"Accuracy on test set: {test_accuracy:.2f}")
print(f"Number of support vectors: {support_vectors_count}")


Results for Polynomial Kernel:
Best Parameters: {'C': 10000000, 'degree': 3, 'gamma': 1e-08}
Accuracy on training set: 0.77
Accuracy on test set: 0.74
Number of support vectors: 1064

Results for RBF Kernel:
Best Parameters: {'C': 10000000, 'gamma': 1e-08}
Accuracy on training set: 0.79
Accuracy on test set: 0.75
Number of support vectors: 1060


## D

In [7]:
# Train the model with the best parameters on the entire training set
best_svm_model = SVC(kernel='rbf', C=best_params['C'], gamma=best_params['gamma'], probability=True)
best_svm_model.fit(train_X, train_y)

# Predictions on training and test sets
train_predictions = best_svm_model.predict(train_X)
test_predictions = best_svm_model.predict(test_X)

# Calculate accuracy on training and test sets
train_accuracy = accuracy_score(train_y, train_predictions)
test_accuracy = accuracy_score(test_y, test_predictions)

# Number of support vectors
support_vectors_count = len(best_svm_model.support_vectors_)

print("Results for RBF Kernel:")
print(f"Best Parameters: {best_params}")
print(f"Accuracy on training set: {train_accuracy:.2f}")
print(f"Accuracy on test set: {test_accuracy:.2f}")
print(f"Number of support vectors: {support_vectors_count}")

Results for RBF Kernel:
Best Parameters: {'C': 10000000, 'gamma': 1e-08}
Accuracy on training set: 0.79
Accuracy on test set: 0.75
Number of support vectors: 1060


## E

In [9]:


# Train the model with the best parameters on the entire training set
best_svm_model = SVC(C=best_params['C'], gamma=best_params['gamma'], probability=True)
best_svm_model.fit(train_X, train_y)

# Predictions on training and test sets
train_predictions = best_svm_model.predict(train_X)
test_predictions = best_svm_model.predict(test_X)

# Calculate accuracy on training and test sets
train_accuracy = accuracy_score(train_y, train_predictions)
test_accuracy = accuracy_score(test_y, test_predictions)

# Number of support vectors
support_vectors_count = len(best_svm_model.support_vectors_)

# Print the results
print(f"Best Parameters: {best_params}")
print(f"Accuracy on training set: {train_accuracy:.2f}")
print(f"Accuracy on test set: {test_accuracy:.2f}")
print(f"Number of support vectors: {support_vectors_count}")

# Cross-validation scores
cv_scores = cross_val_score(best_svm_model, X, y, cv=3)
print(f"Cross-Validation Scores: {cv_scores}")


Best Parameters: {'C': 10000000, 'gamma': 1e-08}
Accuracy on training set: 0.79
Accuracy on test set: 0.75
Number of support vectors: 1060
Cross-Validation Scores: [0.76135593 0.77559322 0.73405699]


# Q2

## A

In [18]:

df = pd.read_csv('Fuel Consumption.csv')

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
train_df, val_df = train_test_split(train_df, test_size=0.25, random_state=42)

X_train, y_train = train_df[['ENGINESIZE']], train_df['FUELCONSUMPTION_COMB']
X_val, y_val = val_df[['ENGINESIZE']], val_df['FUELCONSUMPTION_COMB']
X_test, y_test = test_df[['ENGINESIZE']], test_df['FUELCONSUMPTION_COMB']

linear_svr_model = SVR(kernel='linear')
linear_svr_model.fit(X_train, y_train)

y_train_pred = linear_svr_model.predict(X_train)
y_val_pred = linear_svr_model.predict(X_val)
y_test_pred = linear_svr_model.predict(X_test)

mae_train = mean_absolute_error(y_train, y_train_pred)
mae_val = mean_absolute_error(y_val, y_val_pred)
mae_test = mean_absolute_error(y_test, y_test_pred)

r2_train = r2_score(y_train, y_train_pred)
r2_val = r2_score(y_val, y_val_pred)
r2_test = r2_score(y_test, y_test_pred)

# گزارش دقت مدل
print("Performance on Train Set:")
print(f"Mean Absolute Error (MAE): {mae_train:.2f}")
print(f"R-squared (R2): {r2_train:.2f}")

print("\nPerformance on Validation Set:")
print(f"Mean Absolute Error (MAE): {mae_val:.2f}")
print(f"R-squared (R2): {r2_val:.2f}")

print("\nPerformance on Test Set:")
print(f"Mean Absolute Error (MAE): {mae_test:.2f}")
print(f"R-squared (R2): {r2_test:.2f}")

Performance on Train Set:
Mean Absolute Error (MAE): 1.34
R-squared (R2): 0.69

Performance on Validation Set:
Mean Absolute Error (MAE): 1.39
R-squared (R2): 0.62

Performance on Test Set:
Mean Absolute Error (MAE): 1.44
R-squared (R2): 0.65


## B

In [19]:
nonlinear_svr_model = SVR(kernel='rbf')
nonlinear_svr_model.fit(X_train, y_train)

y_train_pred = nonlinear_svr_model.predict(X_train)
y_val_pred = nonlinear_svr_model.predict(X_val)
y_test_pred = nonlinear_svr_model.predict(X_test)

mae_train = mean_absolute_error(y_train, y_train_pred)
mae_val = mean_absolute_error(y_val, y_val_pred)
mae_test = mean_absolute_error(y_test, y_test_pred)

r2_train = r2_score(y_train, y_train_pred)
r2_val = r2_score(y_val, y_val_pred)
r2_test = r2_score(y_test, y_test_pred)

print("Performance on Train Set:")
print(f"Mean Absolute Error (MAE): {mae_train:.2f}")
print(f"R-squared (R2): {r2_train:.2f}")

print("\nPerformance on Validation Set:")
print(f"Mean Absolute Error (MAE): {mae_val:.2f}")
print(f"R-squared (R2): {r2_val:.2f}")

print("\nPerformance on Test Set:")
print(f"Mean Absolute Error (MAE): {mae_test:.2f}")
print(f"R-squared (R2): {r2_test:.2f}")

Performance on Train Set:
Mean Absolute Error (MAE): 1.31
R-squared (R2): 0.69

Performance on Validation Set:
Mean Absolute Error (MAE): 1.38
R-squared (R2): 0.64

Performance on Test Set:
Mean Absolute Error (MAE): 1.43
R-squared (R2): 0.67
