# Baseline Model

In [None]:
import pandas as pd
import numpy as np
import geopandas as geopd
import warnings
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn import metrics
import visualization as vi

First, we import the final dataframe.

In [None]:
df_all = geopd.read_file("../data/final_shapefiles/foxes_modelling_all.shp")

In [None]:
df_all.target.value_counts()

Our dataset is very imbalanced. Since we want to use a logistic regression as a baseline model, we use downsampling, to get a balanced dataset.

In [None]:
class_0 = df_all[df_all["target"] == 0]
class_1 = df_all[df_all["target"] == 1]
class_0_under = class_0.sample(class_1.shape[0])
df_all = pd.concat([class_0_under, class_1], axis=0)

In [None]:
df_all.target.value_counts()

For the baseline model, we use only elevation as a feature.

In [None]:
X = df_all[["elev"]]
y = df_all["target"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [None]:
lr = LogisticRegression()
lr.fit(X_train, y_train)

In [None]:
y_pred_test = lr.predict(X_test)
print("Test data:")
print('Accuracy: ',metrics.accuracy_score(y_test, y_pred_test))
print('F1-Score: ',metrics.f1_score(y_test, y_pred_test))
print('Precision: ',metrics.precision_score(y_test, y_pred_test))
print('Recall: ',metrics.recall_score(y_test, y_pred_test))
vi.plot_confusion_matrix(confusion_matrix(y_test, y_pred_test), classes = ['No fox', 'Fox'],
                      title = 'Confusion Matrix Test Data')

In [None]:
y_pred_train = lr.predict(X_train)
print("Train data:")
print('Accuracy: ',metrics.accuracy_score(y_train, y_pred_train))
print('F1-Score: ',metrics.f1_score(y_train, y_pred_train))
print('Precision: ',metrics.precision_score(y_train, y_pred_train))
print('Recall: ',metrics.recall_score(y_train, y_pred_train))
vi.plot_confusion_matrix(confusion_matrix(y_train, y_pred_train), classes = ['No fox', 'Fox'],
                      title = 'Confusion Matrix Train Data')