In [2]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split, cross_validate, cross_val_score
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.svm import SVC
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

df = pd.read_csv('postcodes_sampled.csv') # read the data from a csv file
y = df.riskLabel # define you target as the column you want to predit
X = df.drop(columns=['postcode', 'sector', 'localAuthority', 'riskLabel', 'medianPrice']) # this if you feature column

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, random_state=42) #Train_test_split

preproc = ColumnTransformer([
    ('num_transformer', MinMaxScaler(), X_train.select_dtypes(include=np.number).columns),
    ('cat_transformer', OneHotEncoder(sparse=False), X_train.select_dtypes(exclude=np.number).columns) 
]) # Column transformer performing pipeline on numerical and categorical data

X_train = preproc.fit_transform(X_train) # fit & transform 

svc = SVC(C=0.03, kernel='rbf')
svc_reg = SVR()

knn = KNeighborsClassifier()
knn_reg = KNeighborsRegressor()

tree = DecisionTreeClassifier()
tree_reg = DecisionTreeRegressor()

svc.fit(X_train, y_train)
knn.fit(X_train, y_train)
tree.fit(X_train, y_train)

knn_reg.fit(X_train,y_train)
tree_reg.fit(X_train,y_train)
svc_reg.fit(X_train,y_train)

X_test = preproc.transform(X_test)

knn_reg.score(X_test,y_test)
tree_reg.score(X_test,y_test)
svc_reg.score(X_test,y_test)

y_predsvc = svc.predict(X_test)
y_predtree = tree.predict(X_test)
y_predknn = knn.predict(X_test)

print(f'accuracy of SVC {accuracy_score(y_test, y_predsvc)}')
print(f'accuracy of Decision Tree {accuracy_score(y_test, y_predtree)}')
print(f'accuracy of KNN {accuracy_score(y_test, y_predknn)}')
print(f'Regression of knn {knn_reg.score(X_test,y_test)}')
print(f'Regression of Dec_tree {tree_reg.score(X_test,y_test)}')
print(f'Regression of SVC {svc_reg.score(X_test,y_test)}')

accuracy of SVC 0.9048
accuracy of Decision Tree 0.9008
accuracy of KNN 0.9264
Regression of knn 0.30043432051766017
Regression of Dec_tree -0.04051061886165819
Regression of SVC -0.035691518923697485
