In [1]:
import pandas as pd
feats = pd.read_csv(r'C:\Data\DeepLearningWorkshop\OSI_feats_e3.csv')
target = pd.read_csv(r'C:\Data\DeepLearningWorkshop\OSI_target_e2.csv')

In [2]:
from sklearn.model_selection import train_test_split
test_size = 0.2
random_state = 42
X_train, X_test, \
y_train, y_test = train_test_split(feats, target, \
                                   test_size=test_size, \
                                   random_state=random_state)

In [3]:
print(f'Shape of X_train: {X_train.shape}')
print(f'Shape of y_train: {y_train.shape}')
print(f'Shape of X_test: {X_test.shape}')
print(f'Shape of y_test: {y_test.shape}')

Shape of X_train: (9864, 84)
Shape of y_train: (9864, 1)
Shape of X_test: (2466, 84)
Shape of y_test: (2466, 1)


In [12]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(random_state=42, max_iter=100000)

In [13]:
model.fit(X_train, y_train['Revenue'])

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100000,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=42, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [14]:
y_pred = model.predict(X_test)

In [15]:
from sklearn import metrics
accuracy = metrics.accuracy_score(y_pred=y_pred, \
                                  y_true=y_test)
print(f'Accuracy of the model is {accuracy*100:.4f}%')

Accuracy of the model is 87.0641%


In [16]:
precision, recall, fscore, _ = \
metrics.precision_recall_fscore_support(y_pred=y_pred, \
                                        y_true=y_test, \
                                        average='binary')
print(f'Precision: {precision:.4f}\nRecall: \
{recall:.4f}\nfscore: {fscore:.4f}')

Precision: 0.7323
Recall: 0.3528
fscore: 0.4762


In [17]:
coef_list = [f'{feature}: {coef}' for coef, \
             feature in sorted(zip(model.coef_[0], \
             X_train.columns.values.tolist()))]
for item in coef_list:
    print(item)

TrafficType_13: -0.6064504858411706
Month_Dec: -0.6048318503617278
TrafficType_3: -0.5666450710838646
VisitorType_Returning_Visitor: -0.5141014457086867
Month_Mar: -0.49349487605406545
OperatingSystems_1: -0.45769401045666064
OperatingSystems_3: -0.43798427783650123
Month_May: -0.39375177372062725
ExitRates: -0.3658376266629792
SpecialDay: -0.35085488209271376
Region_1: -0.29329263983567366
Region_3: -0.27527811614858466
BounceRates: -0.2746030298189121
Region_4: -0.26415037300485666
TrafficType_1: -0.25412806699110124
Month_June: -0.24631545717260267
Browser_1: -0.2047073350269136
Browser_1.1: -0.2047073350269136
OperatingSystems: -0.17983667139789394
Browser_2: -0.17890877861689722
Browser_2.1: -0.17890877861689722
Browser_4: -0.1692713842448985
Browser_4.1: -0.1692713842448985
OperatingSystems_2: -0.16845730804832879
Browser_6: -0.16111948700427103
Browser_6.1: -0.16111948700427103
Browser_3: -0.15493701651547126
Browser_3.1: -0.15493701651547126
TrafficType_6: -0.14526715009581445
