In [16]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


In [17]:
# Load dataset
df = pd.read_csv('Populated UK UNI.csv')


In [18]:
df = df[df['Student_enrollment'] != '']
df = df.dropna(subset=['Student_enrollment'])

In [19]:
import re
df['Student_enrollment'] = df['Student_enrollment'].str.extract('(\d+)')
df['Student_enrollment'] = df['Student_enrollment'].astype(float)

In [20]:
# Convert percentage columns to numbers 
df['Student_satisfaction'] = df['Student_satisfaction'].str.rstrip('%').astype('float') 


In [21]:
# Select features 
features = ['Region', 'PG_average_fees_(in_pounds)', 'UK_rank', 'Student_enrollment', 'Student_satisfaction']
X = df[features]
y = df['University_name']
X


Unnamed: 0,Region,PG_average_fees_(in_pounds),UK_rank,Student_enrollment,Student_satisfaction
0,East of England,34920,1,20.0,85.5
1,East of England,35494,1,20.0,85.5
2,East of England,27000,1,20.0,85.5
3,East of England,35525,1,20.0,85.5
4,South East England,43600,2,25.0,86.5
...,...,...,...,...,...
519,Wales,12000,130,5.0,74.3
520,London,0,131,2.0,66.1
521,London,17000,131,2.0,66.1
522,London,0,131,2.0,66.1


In [22]:
# Encode categorical data
X = pd.get_dummies(X, columns=['Region']) 
X

Unnamed: 0,PG_average_fees_(in_pounds),UK_rank,Student_enrollment,Student_satisfaction,Region_East Midlands,Region_East of England,Region_London,Region_North East England,Region_North West England,Region_Northern Ireland,Region_Scotland,Region_South East England,Region_South West England,Region_Wales,Region_West Midlands,Region_Yorkshire and the Humber
0,34920,1,20.0,85.5,False,True,False,False,False,False,False,False,False,False,False,False
1,35494,1,20.0,85.5,False,True,False,False,False,False,False,False,False,False,False,False
2,27000,1,20.0,85.5,False,True,False,False,False,False,False,False,False,False,False,False
3,35525,1,20.0,85.5,False,True,False,False,False,False,False,False,False,False,False,False
4,43600,2,25.0,86.5,False,False,False,False,False,False,False,True,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
519,12000,130,5.0,74.3,False,False,False,False,False,False,False,False,False,True,False,False
520,0,131,2.0,66.1,False,False,True,False,False,False,False,False,False,False,False,False
521,17000,131,2.0,66.1,False,False,True,False,False,False,False,False,False,False,False,False
522,0,131,2.0,66.1,False,False,True,False,False,False,False,False,False,False,False,False


In [23]:
# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [24]:
# Scale data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_test

array([[-0.35129561,  1.62012971, -0.55389153, ..., -0.27770636,
        -0.30190368, -0.30658568],
       [ 2.47832675, -1.60547802,  0.01829918, ..., -0.27770636,
        -0.30190368, -0.30658568],
       [-0.30313183,  1.28196116, -1.46939667, ..., -0.27770636,
         3.31231468, -0.30658568],
       ...,
       [-0.35129561,  0.60562405, -0.66832967, ..., -0.27770636,
        -0.30190368, -0.30658568],
       [-0.11047669,  0.65764999,  0.01829918, ..., -0.27770636,
        -0.30190368, -0.30658568],
       [-1.85641389, -0.59097236, -1.35495852, ..., -0.27770636,
        -0.30190368, -0.30658568]])

In [25]:
# Train SVM model
svm = SVC(kernel='rbf')
svm.fit(X_train, y_train)

In [26]:
# Evaluate model 
y_pred = svm.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.1619047619047619


In [27]:
# Recommend universities for new student
new_data = [['London', 25000, 5, 10000, 90]]  

new_data = pd.DataFrame(new_data, columns=['Region', 'PG_average_fees_in_pounds', 'UK_Rank', 'Student_enrollment', 'Student_satisfaction'])
new_data

Unnamed: 0,Region,PG_average_fees_in_pounds,UK_Rank,Student_enrollment,Student_satisfaction
0,London,25000,5,10000,90


In [None]:


new_data = pd.get_dummies(new_data, columns=['Region'])
new_data

In [None]:


new_data = scaler.transform(new_data)
new_data

In [None]:
# Get top 5 recommendations 
indices = scores.argsort()[-5:][::-1]
recommendations = df['University Name'].iloc[indices]

print("Recommended Universities:")
print(recommendations)