# Call landlord with logistic regression

In [3]:
import pandas as pd
import numpy as np

In [4]:
data = pd.read_csv('call.csv', header=None)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values
print(X.shape, y.shape)

(4048, 17) (4048,)


In [5]:
def transform(X):
    card_array_list = list()
    for row in range(X.shape[0]):
        card_dict = {i: 0 for i in range(1, 16)}
        for col in range(X.shape[1]):
            card_dict[X[row, col]] += 1
        card_array = np.array(list(card_dict.values())).reshape((1, 15))
        card_array_list.append(card_array)
    return np.concatenate(card_array_list, axis=0)

In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score

# training and validation
transformed_X = transform(X)
X_train, X_test, y_train, y_test = train_test_split(transformed_X, y, test_size=0.25)
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(f'Accuracy score: {accuracy_score(y_test, y_pred)}')

Accuracy score: 0.9169960474308301


In [7]:
# retrain on the entire training set
model = LogisticRegression()
model.fit(transformed_X, y)

LogisticRegression()

In [8]:
import pickle

# Save to file in the current working directory
pkl_filename = "call_landlord_model.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(model, file)

In [10]:
model.predict(transformed_X)

array([0, 0, 0, ..., 0, 1, 0])

In [11]:
model.predict_proba(transformed_X)

array([[9.98673632e-01, 1.32636754e-03],
       [9.91752847e-01, 8.24715264e-03],
       [8.13177742e-01, 1.86822258e-01],
       ...,
       [6.94572193e-01, 3.05427807e-01],
       [1.20624390e-04, 9.99879376e-01],
       [9.85212636e-01, 1.47873637e-02]])