In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV

In [None]:
import warnings
warnings.filterwarnings("ignore")

# Data

In [None]:
train = pd.read_csv("location_train.csv")
test = pd.read_csv("location_test.csv")

In [None]:
train.info()

In [None]:
train.head()

In [None]:
test.info()

In [None]:
test.head()

In [None]:
X_train = train.drop(["ID", "class"], axis=1)
y_train = train["class"]

X_test = test.drop(["ID"], axis=1)

# Model selection

In [None]:
# Grid Search

cv = 5          # number of folds
verbose = 3     # information shown during training

## Logistic Regression

In [None]:
parameters = {
    "penalty":["l1", "l2", "elasticnet", "none"], 
    "C":[0.001, 0.01 ,0.1, 1, 10],
    "class_weight":["balanced"],
    "solver":["liblinear", "saga"]}
model = GridSearchCV(LogisticRegression(), parameters, cv=cv, verbose=verbose, scoring="accuracy")
model.fit(X_train, y_train)

results = pd.DataFrame(model.cv_results_)
results = results[["param_penalty", "param_C", "param_class_weight", "param_solver", "mean_test_score"]]
results.sort_values(["mean_test_score"], ascending=False).head(10)

# Final model

In [None]:
best_model = model.best_estimator_
best_model.fit(X_train, y_train)

In [None]:
predictions = pd.DataFrame(test["ID"])
predictions["class"] = best_model.predict(X_test)

In [None]:
predictions.to_csv("submission.csv", index=False)