# Aprendizaje Automático en Grafos

### - LECTURA DE FICHEROS

In [17]:
import pandas
import numpy
#Usamos el método read_csv para leer  ENGB_target.csv, y para ello necesitamos pandas.
twitch_users = pandas.read_csv('ENGB_target.csv', skiprows=1 , header=None,
                       names=['id2', 'days', 'mature', 'views',
                              'partner', 'id'], 
                        usecols = ['id', 'days', 'mature', 'views',
                              'partner'] )


In [18]:
cols = ['id', 'days', 'views','partner', 'mature']
twitch_users [['partner']] *= 1 #codificamos la columna de partner a 0=False y 1=True
twitch_users=twitch_users[cols]

In [19]:
twitch_users.head(20)


Unnamed: 0,id,days,views,partner,mature
0,2299,1459,9528,0,False
1,153,1629,3615,0,True
2,397,411,46546,0,True
3,5623,953,5863,0,True
4,5875,741,5594,0,True
5,3346,1820,4594,0,False
6,6930,2070,18460,0,False
7,6367,846,204,0,True
8,1434,936,1064,0,True
9,4470,973,313084,0,False


### - CREACIÓN DEL GRAFO

In [20]:
#usando la librería networkx (como nx) creamos un grafo vacío
import networkx as nx
G=nx.Graph()

In [21]:
#recorremos las aristas
aristas = pandas.read_csv('ENGB_edges.csv')

In [22]:
aristas1 = aristas[['from', 'to']]

In [23]:
G = nx.from_pandas_edgelist(aristas1, 'from', 'to')

In [24]:
print(G.number_of_edges())
G.number_of_nodes()

35324


7126

In [25]:
print("""from matplotlib.pyplot import figure
figure(figsize=(100, 80))
nx.draw_spring(G, with_labels=True)
#Asi se dibuja el grafo pero es demasiado grande, por lo que no recomendamos ejecutarlo""")
#Comentado para que no se ejecute con "run all"

from matplotlib.pyplot import figure
figure(figsize=(100, 80))
nx.draw_spring(G, with_labels=True)
#Asi se dibuja el grafo pero es demasiado grande, por lo que no recomendamos ejecutarlo


### - MODELO DE CLASIFICACIÓN _KNN_

In [26]:
from sklearn import preprocessing

atributos = twitch_users.loc[:, 'days': 'partner']  # selección de las columnas de atributos
objetivo = twitch_users['mature']  # selección de la columna objetivo

In [27]:
print(pandas.Series(objetivo).value_counts(normalize=True))

True     0.545608
False    0.454392
Name: mature, dtype: float64


In [28]:
from sklearn import model_selection
from sklearn import neighbors
cv_scores_KNN = {}
for k in range (1,11):                          #EXPLICAR POR QUE USAMOS HAMMING
    clasif_KNN = neighbors.KNeighborsClassifier(n_neighbors=k, metric='hamming')  
    cv_scores = model_selection.cross_val_score(clasif_KNN,
                                               atributos,
                                               objetivo,
                                               cv=10)
    cv_scores_KNN[k]= cv_scores.mean()
print(cv_scores_KNN)
mejor_k_KNN = max(cv_scores_KNN, key=cv_scores_KNN.get)
print(f'Mejor suavizado: {mejor_k_KNN}')

{1: 0.49915868225727655, 2: 0.47684495012370587, 3: 0.48667168318704, 4: 0.47109243267094253, 5: 0.48947752021053625, 6: 0.4696895141591944, 7: 0.47306108861118557, 8: 0.4584643932111508, 9: 0.4640764612257119, 10: 0.46126924531572566}
Mejor suavizado: 1


### - MODELO DE CLASIFICACIÓN _Naive Bayes_

In [29]:
from sklearn import naive_bayes

In [38]:

objetivo2 = objetivo * 1 #codificamos la columna de partner a 0=False y 1=True
print(objetivo2)

0       0
1       1
2       1
3       1
4       1
       ..
7121    0
7122    1
7123    1
7124    1
7125    0
Name: mature, Length: 7126, dtype: int32


In [42]:
cv_scores_NB = {}
for k in range (1,2):
    clasif_NB = naive_bayes.CategoricalNB(alpha=k)  # alpha es el parámetro de suavizado
    cv_scores = model_selection.cross_val_score(clasif_NB,
                                               atributos,
                                               objetivo2,
                                               cv=3)
    cv_scores_NB[k]= cv_scores.mean()
print(cv_scores_NB)
mejor_k_NB = max(cv_scores_NB, key=cv_scores_NB.get)
print(f'Mejor suavizado: {mejor_k_NB}')

{1: nan}
Mejor suavizado: 1


Traceback (most recent call last):
  File "C:\Users\pacor\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\model_selection\_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\pacor\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\metrics\_scorer.py", line 105, in __call__
    score = scorer(estimator, *args, **kwargs)
  File "C:\Users\pacor\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\metrics\_scorer.py", line 418, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "C:\Users\pacor\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\base.py", line 651, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
  File "C:\Users\pacor\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\naive_bayes.py", line 83, in predict
    jll = self._joint_log_likelihood(X)
  File "C:\Users\pacor\AppData\Local\Programs\Pytho

### - MODELO DE CLASIFICACIÓN _Redes Neuronales_

In [31]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

from tensorflow import get_logger
get_logger().setLevel('ERROR')

In [32]:
from tensorflow import random as tensorflow_random

tensorflow_random.set_seed(394867)

In [33]:
from sklearn import model_selection

numpy.random.seed(43958734)
numpy.set_printoptions(threshold=10)

In [34]:
from tensorflow import keras

In [67]:
atributos_numpy = atributos.to_numpy()*1
print(atributos)
print(atributos_numpy)

      days  views  partner
0     1459   9528        0
1     1629   3615        0
2      411  46546        0
3      953   5863        0
4      741   5594        0
...    ...    ...      ...
7121  2624   3174        0
7122  2035   3158        0
7123  1418   3839        0
7124  2046   6208        0
7125  1797   3545        0

[7126 rows x 3 columns]
[[ 1459  9528     0]
 [ 1629  3615     0]
 [  411 46546     0]
 ...
 [ 1418  3839     0]
 [ 2046  6208     0]
 [ 1797  3545     0]]


In [68]:
objetivo_numpy = objetivo.to_numpy().astype(float)
print(objetivo)
print(objetivo_numpy)

0       False
1        True
2        True
3        True
4        True
        ...  
7121    False
7122     True
7123     True
7124     True
7125    False
Name: mature, Length: 7126, dtype: bool
[0. 1. 1. ... 1. 1. 0.]


In [69]:
#normalizador.adapt(atributos_numpy)

In [70]:
(atributos_entrenamiento, atributos_prueba,
 objetivo_entrenamiento, objetivo_prueba) = model_selection.train_test_split(
    atributos_numpy, objetivo_numpy, test_size=.25)

In [94]:
red_twitch = keras.Sequential()

red_twitch.add(keras.Input(shape=(4,)))
red_twitch.add(keras.layers.Dense(1))
#red_twitch.add(keras.layers.Dense(60, input_dim=(174), activation='relu'))
#función de activación (por defecto, la identidad), Dense(2, activation='softmax')


In [95]:
red_twitch.weights

[<tf.Variable 'dense_8/kernel:0' shape=(4, 1) dtype=float32, numpy=
 array([[ 0.2352897 ],
        [ 0.10924065],
        [-0.9683454 ],
        [-0.8860172 ]], dtype=float32)>,
 <tf.Variable 'dense_8/bias:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>]

In [96]:
red_twitch.compile(optimizer='SGD', loss='mean_squared_error')
#red_twitch.compile(optimizer='SGD', loss='binary_crossentropy', metrics=['accuracy'])

In [97]:
red_twitch.fit(atributos_entrenamiento, objetivo_entrenamiento,
                batch_size=256, epochs=10)

Epoch 1/10


ValueError: in user code:

    File "C:\Users\pacor\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\pacor\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1010, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\pacor\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1000, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\pacor\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 859, in train_step
        y_pred = self(x, training=True)
    File "C:\Users\pacor\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\pacor\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\input_spec.py", line 264, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" is '

    ValueError: Input 0 of layer "sequential_8" is incompatible with the layer: expected shape=(None, 4), found shape=(None, 3)
