In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, KFold, cross_val_score, GridSearchCV

from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn import neighbors
from sklearn import svm

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
import warnings
warnings.filterwarnings("ignore")

# Data

In [None]:
train = pd.read_csv("location_train.csv")
test = pd.read_csv("location_test.csv")
train.head()

In [None]:
train.info()

In [None]:
X = train.drop(["ID", "class"], axis=1)
y = train["class"]

X_test = test.drop(["ID"], axis=1)

In [None]:
X.head()

In [None]:
y.head()

# Exploratory data analysis

In [None]:
X.isnull().sum()

In [None]:
# Appearence of each class
y.value_counts().sort_index()

# Model selection

In [None]:
cv = 5
verbose = 3

## KNN

In [None]:
parameters = {"n_neighbors":[1, 5, 10, 20, 30, 40, 50], "weights":["uniform", "distance"]}
knn = GridSearchCV(neighbors.KNeighborsClassifier(), parameters, cv=cv, verbose=verbose)
knn.fit(X, y)

In [None]:
knn_results = pd.DataFrame(knn.cv_results_)
knn_results= knn_results[["param_n_neighbors", "param_weights", "mean_test_score"]]
knn_results.sort_values(["mean_test_score"], ascending=False)

## Decision Tree

In [None]:
parameters = {"n_estimators":[100, 200, 300, 400, 500], "criterion":["gini", "entropy"], "max_depth":["None", 10, 20]}
dt = GridSearchCV(RandomForestClassifier(), parameters, cv=cv, verbose=verbose)
dt.fit(X, y)

In [None]:
dt_results = pd.DataFrame(dt.cv_results_)
dt_results = dt_results[["param_n_estimators", "param_criterion", "param_max_depth", "mean_test_score"]]
dt_results.sort_values(["mean_test_score"], ascending=False)

## Random Forest

In [None]:
parameters = {"n_estimators":[100, 200, 300, 400, 500], "criterion":["gini", "entropy"], "max_depth":["None", 10, 20]}
rf = GridSearchCV(RandomForestClassifier(), parameters, cv=cv, verbose=verbose)
rf.fit(X, y)

In [None]:
rf_results = pd.DataFrame(rf.cv_results_)
rf_results = rf_results[["param_n_estimators", "param_criterion", "param_max_depth", "mean_test_score"]]
rf_results.sort_values(["mean_test_score"], ascending=False)

## SVM

In [None]:
parameters = {"kernel":["sigmoid", "rbf"], "C":[1, 5, 10, 20], "gamma":["scale", "auto"]}
svm = GridSearchCV(svm.SVC(), parameters, cv=cv, verbose=verbose)
svm.fit(X, y)

In [None]:
svm_results = pd.DataFrame(svm.cv_results_)
svm_results = svm_results[["param_C", "param_kernel", "param_gamma", "mean_test_score"]]
svm_results.sort_values(["mean_test_score"], ascending=False)

## Logistic Regression

In [None]:
parameters = {"penalty":["l1", "l2", "elasticnet", "none"], "C":[1, 5, 10, 20], "solver":["saga"]}
logreg = GridSearchCV(LogisticRegression(), parameters, cv=cv, verbose=verbose)
logreg.fit(X, y)

In [None]:
logreg_results = pd.DataFrame(logreg.cv_results_)
logreg_results = logreg_results[["param_penalty", "param_C", "mean_test_score"]]
logreg_results.sort_values(["mean_test_score"], ascending=False)

# Final model

In [None]:
model = svm.SVC(C=5, kernel="rbf", gamma="scale")
model.fit(X, y)

In [None]:
predictions = pd.DataFrame(test["ID"])
predictions["class"] = model.predict(X_test)

In [None]:
predictions.to_csv("submission.csv", index=False)