In [1]:
import pandas as pd
import numpy as np
import networkx as nx

In [2]:
filename = "3days.csv"

In [3]:
edges = pd.read_csv(filename, usecols=['from_address', 'to_address'])

In [4]:
total_rows = sum(1 for _ in open(filename, 'r'))
percentage_to_read = 33
rows_to_read = int(total_rows * percentage_to_read / 100)
df = pd.read_csv(filename, nrows=rows_to_read, usecols=['from_address', 'to_address'])
G = nx.from_pandas_edgelist(df, source='from_address', target='to_address', create_using=nx.DiGraph())

In [5]:
non_directed_g = G.to_undirected()

# Cantidad de nodos

In [6]:
total_nodes = G.number_of_nodes()
total_nodes

545345

# Cantidad de aristas

In [7]:
total_edges = G.number_of_edges()
total_edges

699007

In [8]:
non_directed_total_edges = non_directed_g.number_of_edges()
non_directed_total_edges

690265

In [9]:
non_directed_total_edges / total_edges

0.9874936874738022

#### El 98,39% de los nodos tiene links de entrada y salida entre pares


In [10]:
cliques = list(nx.enumerate_all_cliques(non_directed_g))

In [11]:
cliques

[['0x5f67ffa4b3f77dd16c9c34a1a82cab8daea03191'],
 ['0xeb3a9a839dfeeaf71db1b4ed6a8ae0ccb171b227'],
 ['0x451844bfe5699c70031ab1fd236593a4ed3f4f30'],
 ['0x4c9af439b1a6761b8e549d8d226a468a6b2803a8'],
 ['0xdac17f958d2ee523a2206206994597c13d831ec7'],
 ['0x9fd0d228d9afedf44f262be67217fadf0531152a'],
 ['0x03df236eacfcef4457ff7d6b88e8f00823014bcd'],
 ['0xc21785dc23f9c6ba623c1b7eef2b2facaaaf858c'],
 ['0xa67b8e40111a0edd30c3210b77aadb86ad234c43'],
 ['0xc7ae720f169dd0e962093ea7999593249371814f'],
 ['0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2'],
 ['0x748c5fe4d3b6b53b5200ebb5adeed55dccf01b92'],
 ['0x781a0ba563ce67f04de5ac9b31d5ef929f9f8632'],
 ['0x8f9cf48e154e76a09f175e17d1cd43ecb57513ce'],
 ['0x68b3465833fb72a70ecdf485e0e4c7bd8665fc45'],
 ['0x116895e720d659fc8ce61855bdb78c87ca73875f'],
 ['0xdf2e711304604a3cfc56bd7d4b35b82b82ed6f5b'],
 ['0xf0a9b722de8996fa9670a3a84c85c7ffb2fe77c9'],
 ['0x881d40237659c251811cec9c364ef91dc08d300c'],
 ['0x56be9fd9dfc9f6ff984f52d4eab9945ac8a5e2c7'],
 ['0x9ca8887d13bc459

In [12]:
non_single_clique = []
for clique in cliques:
    if len(clique) > 4:
        non_single_clique.append(clique)
non_single_clique

[['0x68b3465833fb72a70ecdf485e0e4c7bd8665fc45',
  '0x392efe34a716ab510e0b89c8766b5788c03adc7e',
  '0x26ea609211f83f9be9078f3790a0b39b03150477',
  '0x4ac29f990aa21098b97990d8852cfc4239503547',
  '0xbdaeff3a007989b9358a004c8f6f415493bcfed5'],
 ['0x283af0b28c62c092c9727f1ee09c02ca627eb7f5',
  '0x20a6d8d7dcc7109e747b8ca7cc98ff0c4cd5c78a',
  '0x575545afea337f74199c488b8213f5a19e26d5dd',
  '0x690e1537d5a8a1189f0ccd33750ed1b65226e78f',
  '0x3945cb4944b5123de007e8966636fa060d4c6887'],
 ['0x283af0b28c62c092c9727f1ee09c02ca627eb7f5',
  '0x20a6d8d7dcc7109e747b8ca7cc98ff0c4cd5c78a',
  '0x575545afea337f74199c488b8213f5a19e26d5dd',
  '0x3945cb4944b5123de007e8966636fa060d4c6887',
  '0x2f05b3d71e5ae4aa449659fbaca1badd71214fd3'],
 ['0x7f268357a8c2552623316e2562d90e642bb538e5',
  '0xa9b364b49a50482b67fc8b032a15fc9768ebb726',
  '0xf51da429c16a49b0decfe81281dc09b0d034f454',
  '0xc23906b2c112e1081fdde8d8cfc24b236f5795f4',
  '0xe3a56b33545dbe01b5a112f2b0714eaa192159c4'],
 ['0xd311bdacb151b72bddfee9cbdc414af

In [13]:
len(non_single_clique)

8

In [14]:
pagerank = nx.pagerank(G)

sorted_pagerank = sorted(pagerank.items(), key=lambda x: x[1], reverse=True)

top_15_pagerank = sorted_pagerank[:15]

for node, value in top_15_pagerank:
    print(f"Nodo: {node}, PageRank: {value}")

Nodo: 0x7f268357a8c2552623316e2562d90e642bb538e5, PageRank: 0.022708448549030192
Nodo: 0xdac17f958d2ee523a2206206994597c13d831ec7, PageRank: 0.018658989519013177
Nodo: 0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258, PageRank: 0.017931939921801927
Nodo: 0x283af0b28c62c092c9727f1ee09c02ca627eb7f5, PageRank: 0.011505087860588428
Nodo: 0x68b3465833fb72a70ecdf485e0e4c7bd8665fc45, PageRank: 0.008580736443214176
Nodo: 0x4d224452801aced8b2f0aebe155379bb5d594381, PageRank: 0.006649978099172729
Nodo: 0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48, PageRank: 0.006563649307952833
Nodo: 0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2, PageRank: 0.00578834988808716
Nodo: 0xc098b2a3aa256d2140208c3de6543aaef5cd3a94, PageRank: 0.0033002952680057595
Nodo: 0x6dfc34609a05bc22319fa4cce1d1e2929548c0d7, PageRank: 0.002935563680851925
Nodo: 0xa5409ec958c83c3f309868babaca7c86dcb077c1, PageRank: 0.002569303009903347
Nodo: 0xfbddadd80fe7bda00b901fbaf73803f2238ae655, PageRank: 0.00216296180713029
Nodo: 0x6b175474e89094c44da98

## Find cliques

###### https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.clique.find_cliques.html#networkx.algorithms.clique.find_cliques

In [15]:
maximal_cliques = list(nx.find_cliques(non_directed_g))

In [25]:
more_than_2_cliques = []
for clique in maximal_cliques:
    if len(clique) > 3:
        more_than_2_cliques.append(clique)

len(more_than_2_cliques)

329

In [27]:
# no se de que sirve este numero, me parecio interesante
len(more_than_2_cliques) / total_nodes

0.0006032878269719168

Investigar si puede ser interesante armar un grafo de cliques con el método make_max_clique_graph y a partir de ahí buscar caminos, otras subcomunidades, etc.