In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix

In [2]:
dataset = pd.read_csv("../datasets/Social_Network_Ads.csv")
print(dataset.head())
print(dataset.describe())
x = dataset.iloc[:,[2,3]].values
y = dataset.iloc[:,-1].values

    User ID  Gender   Age  EstimatedSalary  Purchased
0  15624510    Male  19.0          19000.0          0
1  15810944    Male  35.0          20000.0          0
2  15668575  Female  26.0          43000.0          0
3  15603246  Female  27.0          57000.0          0
4  15804002    Male  19.0          76000.0          0
            User ID         Age  EstimatedSalary   Purchased
count  4.000000e+02  400.000000       400.000000  400.000000
mean   1.569154e+07   37.655000     69742.500000    0.357500
std    7.165832e+04   10.482877     34096.960282    0.479864
min    1.556669e+07   18.000000     15000.000000    0.000000
25%    1.562676e+07   29.750000     43000.000000    0.000000
50%    1.569434e+07   37.000000     70000.000000    0.000000
75%    1.575036e+07   46.000000     88000.000000    1.000000
max    1.581524e+07   60.000000    150000.000000    1.000000


In [3]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.25, random_state = 0)

In [4]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [5]:
classifier = LogisticRegression(random_state=0)
classifier.fit(x_train, y_train)

LogisticRegression(random_state=0)

In [6]:
y_pred = classifier.predict(x_test)
y_pred

array([0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1], dtype=int64)

In [7]:
cm = confusion_matrix(y_test, y_pred)
cm

array([[65,  3],
       [ 8, 24]], dtype=int64)

In [20]:
fig = px.imshow(cm, labels=dict(x='Valores Reales', y='Valores Predichos'),
        x=['0','1'],y=['0','1'],title='Matriz de confusion',color_continuous_scale='blues')
for x,r in enumerate(cm):
    for y,value in enumerate(r):
        #print(x,y,value)
        fig.add_annotation(x=x, y=y, text=f'{value}',showarrow=False,
        font=dict(size=16,color='#808080'))
fig.update_layout(template='plotly_dark')
fig.show()

In [21]:
# Graficar el modelo
mesh_size = .02
margin = 0.25
# Create a mesh grid on which we will run our model
x_min, x_max = x_train[:, 0].min() - margin, x_train[:, 0].max() + margin
y_min, y_max = x_train[:, 1].min() - margin, x_train[:, 1].max() + margin
xrange = np.arange(x_min, x_max, mesh_size)
yrange = np.arange(y_min, y_max, mesh_size)
xx, yy = np.meshgrid(xrange, yrange)

In [22]:
z = classifier.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1].reshape(xx.shape)
z.shape
z

array([[4.78005796e-04, 4.98267191e-04, 5.19386967e-04, ...,
        8.70724716e-01, 8.75328355e-01, 8.79790686e-01],
       [4.88733869e-04, 5.09449767e-04, 5.31043281e-04, ...,
        8.73203782e-01, 8.77731535e-01, 8.82119433e-01],
       [4.99702597e-04, 5.20883182e-04, 5.42961049e-04, ...,
        8.75642098e-01, 8.80094737e-01, 8.84408995e-01],
       ...,
       [5.70811920e-02, 5.93582282e-02, 6.17201524e-02, ...,
        9.98828486e-01, 9.98876093e-01, 9.98921768e-01],
       [5.82882049e-02, 6.06102953e-02, 6.30187027e-02, ...,
        9.98854184e-01, 9.98900748e-01, 9.98945422e-01],
       [5.95191297e-02, 6.18870351e-02, 6.43427001e-02, ...,
        9.98879319e-01, 9.98924863e-01, 9.98968558e-01]])

In [66]:
# Plot the figure
fig1 = go.Figure([
    go.Scatter(x=x_train[:, 0], y=x_train[:, 1], name='Train', mode='markers',
                marker=dict(color=y_train, size=8, colorscale='Bluered')),
    go.Contour(x=xrange, y=yrange, z=z, line_width=0.2, line_smoothing=0.5,
                contours=dict(coloring='heatmap', showlabels=True, labelfont=dict(size=12, color='white'))),
    go.Heatmap(x=xrange, y=yrange, z=z)
])
fig1.update_layout(template='plotly_dark', height=600)
fig1.show()
