In [3]:
import pandas as pd
import numpy as np
import datetime as dt

In [7]:
df = pd.read_csv('rutas_ejemplo/ejemplo_info.csv')
df

Unnamed: 0,hora,fecha,ruta,longitud,latitud,carga
0,4.0,2020-10-10,1002.0,-75.568834,6.342525,2.0
1,22.0,2020-10-07,1001.0,-75.571189,6.283906,44.0
2,5.0,2020-10-06,1001.0,-75.575995,6.246103,27.0
3,19.0,2020-10-10,1002.0,-75.558036,6.326629,51.0
4,8.0,2020-10-10,1003.0,-75.568366,6.257081,21.0
5,20.0,2020-10-08,1001.0,-75.571730,6.257121,54.0
6,7.0,2020-10-02,1003.0,-75.568266,6.346123,54.0
7,20.0,2020-10-07,1001.0,-75.562857,6.342874,25.0
8,9.0,2020-10-01,1001.0,-75.574094,6.270815,38.0
9,17.0,2020-10-01,1002.0,-75.566823,6.297592,43.0


# Primer enfoque (Regresión Poisson)

In [8]:
import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np
import pandas as pd
from statsmodels.genmod.generalized_estimating_equations import GEE
from statsmodels.genmod.cov_struct import (Exchangeable, Independence,Autoregressive)
from statsmodels.genmod.families import Poisson

In [9]:
fam = Poisson()
ind = Independence()
model1 = GEE.from_formula("carga ~ latitud + longitud + hora", "ruta", data = df, cov_struct=ind, family=fam)
result1 = model1.fit()
print(result1.summary())

                               GEE Regression Results                              
Dep. Variable:                       carga   No. Observations:               200000
Model:                                 GEE   No. clusters:                        3
Method:                        Generalized   Min. cluster size:               61523
                      Estimating Equations   Max. cluster size:               73127
Family:                            Poisson   Mean cluster size:             66666.7
Dependence structure:         Independence   Num. iterations:                    33
Date:                     Fri, 23 Oct 2020   Scale:                           1.000
Covariance type:                    robust   Time:                         15:39:14
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     28.6035     17.191      1.664      0.096      -5.090      62.297
latitud

In [10]:
result1.scale

1.0

# Segundo Enfoque (knn) 

In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import matplotlib.patches as mpatches
import seaborn as sb
 
%matplotlib inline
plt.rcParams['figure.figsize'] = (16, 9)
plt.style.use('ggplot')
 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [12]:
X = df[['hora', 'longitud', 'latitud']].values
y = df['carga'].values
 
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [13]:
n_neighbors = 3
 
knn = KNeighborsClassifier(n_neighbors)
knn.fit(X_train, y_train)
print('Accuracy of K-NN classifier on training set: {:.2f}'
     .format(knn.score(X_train, y_train)))
print('Accuracy of K-NN classifier on test set: {:.2f}'
     .format(knn.score(X_test, y_test)))

Accuracy of K-NN classifier on training set: 0.07
Accuracy of K-NN classifier on test set: 0.02


# Tercer enfoque (Análisis Discrimante)

In [14]:
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

In [15]:
clf = QuadraticDiscriminantAnalysis()
clf.fit(X, y)
print(clf.predict([[7, -58.6, 6.2]]))

[ 7.]


In [16]:
clf.predict_proba(X_test)

array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

# Cuarto Enfoque (Red Neuronal) 

In [17]:
from sklearn.neural_network import MLPClassifier

In [18]:
# Red neuronal con 5 capas. 
clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1)
clf.fit(X, y)

MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(5, 2), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=1, shuffle=True, solver='lbfgs', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [19]:
print(clf.predict([[588, -58.8, 6.2]]))

[ 39.]
