# JN07 - Build a Bipartite Network
---
In this notebook, we developed the bipartite network from a .csv file. 

In [1]:
### Import libraries

import pandas as pd
import igraph as ig
import auxiliar_path

In [4]:
### Global variables
### Global variables

DATASET = "TOY" # AMZ, HC, PM, UN, TOY
NODE_TYPE = False

PATH_DATASET = auxiliar_path.get_path_dataset(DATASET)
PATH_NODETYPE = auxiliar_path.get_path_topbot(NODE_TYPE)

GLOBAL_PATH = "/Users/ddiaz/Documents/code/phd-thesis-lab/"

# File CSV
FILENAME = GLOBAL_PATH + "12-third_year/00-Data/"+PATH_DATASET+"/01-DistributionsCSV/"+DATASET+"-Rw.csv"

In [42]:
### Read CSV

df = pd.read_csv(FILENAME)

# Remove noisy column
df = df.drop(columns=["Unnamed: 0"])
print(df.info()) # Info
print()

# Obtener identificadores únicos por tipo
unique_source = sorted(df['uname'].unique())  # Nodos tipo 1
unique_target = sorted(df['rname'].unique())  # Nodos tipo 2

# Crear nuevo mapeo de IDs
source_mapping = {old_id: new_id for new_id, old_id in enumerate(unique_source)}
start_target_id = len(unique_source)  # El primer ID del target será el último del source + 1
target_mapping = {old_id: new_id for new_id, old_id in enumerate(unique_target, start=start_target_id)}

# Aplicar el mapeo al dataframe
df_mapped = df.replace({'uname': source_mapping, 'rname': target_mapping})

# Contar la frecuencia correcta después del mapeo
source_counts = df_mapped['uname'].value_counts().to_dict()
target_counts = df_mapped['rname'].value_counts().to_dict()

# Fusionar ambas frecuencias.
node_frequencies = {node: source_counts.get(node, 0) + target_counts.get(node, 0) for node in range(len(source_mapping) + len(target_mapping))}

# Contar la frecuencia de cada arista después del mapeo
edge_counts = df_mapped.groupby(['uname', 'rname']).size().to_dict()

# Some information about access requests
n_user = len(df_mapped.uname.drop_duplicates())
n_rscs = len(df_mapped.rname.drop_duplicates())
print(f"|U| = {n_user}")
print(f"|R| = {n_rscs}")
print(f"|U+R| = {n_user+n_rscs}")
print()

# Possible edges
n_acc_res = len(df_mapped.drop_duplicates(["uname", "rname"]))
df_pos = df_mapped[df_mapped.ACTION == 1]
n_ar_pos = len(df_pos.drop_duplicates())
n_ar_neg = len(df_mapped[df_mapped.ACTION == 0].drop_duplicates())

print(f"|L| = {n_acc_res}")
print(f"|L+| = {n_ar_pos}")
print(f"|L-| = {n_ar_neg}")
print()

if n_acc_res == n_ar_pos+n_ar_neg:
    print("*"*43)
    print("** CORRECT FLAG: Same number L = L+ + L- **")
    print("*"*43)

# To generate a new .CSV file with the clean data
filename_csv = GLOBAL_PATH + "12-third_year/00-Data/"+PATH_DATASET+"/01-DistributionsCSV/"+DATASET+"-MOD.csv"
df_mapped.to_csv(filename_csv, index=False)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   uname   100 non-null    int64
 1   rname   100 non-null    int64
 2   ACTION  100 non-null    int64
dtypes: int64(3)
memory usage: 2.5 KB
None

|U| = 8
|R| = 4
|U+R| = 12

|L| = 14
|L+| = 14
|L-| = 0

*******************************************
** CORRECT FLAG: Same number L = L+ + L- **
*******************************************


In [50]:
### Generate bipartite graph

# Crear el grafo bipartito en igraph
edges = list(edge_counts.keys())  # Lista de aristas sin duplicados
g = ig.Graph(edges=edges, directed=False)

# Agregar identificador
g.vs['id'] = list(range(g.vcount()))

# Etiquetar los nodos con su tipo
g.vs['type'] = [0] * len(source_mapping) + [1] * len(target_mapping)  # 0 para tipo 1, 1 para tipo 2

# Agregar el atributo de frecuencia de los nodos
g.vs['freq'] = [node_frequencies[node] for node in range(len(g.vs))]

# Agregar el atributo de peso a las aristas
g.es['weight'] = [edge_counts[edge] for edge in edges]

# Number of nodes
print(g.summary())
print(f"|V| = {g.vcount()}")
print(f"|U| = {len(g.vs.select(type_eq=0))}")
print(f"|R| = {len(g.vs.select(type_eq=1))}")
print(f"|E| = {g.ecount()}")
print(f"Is bipartite = {g.is_bipartite()}")

IGRAPH U-WT 12 14 -- 
+ attr: freq (v), id (v), type (v), weight (e)
|V| = 12
|U| = 8
|R| = 4
|E| = 14
Is bipartite = True


In [51]:
list(g.vs())

[igraph.Vertex(<igraph.Graph object at 0x11a107740>, 0, {'id': 0, 'type': 0, 'freq': 11}),
 igraph.Vertex(<igraph.Graph object at 0x11a107740>, 1, {'id': 1, 'type': 0, 'freq': 8}),
 igraph.Vertex(<igraph.Graph object at 0x11a107740>, 2, {'id': 2, 'type': 0, 'freq': 27}),
 igraph.Vertex(<igraph.Graph object at 0x11a107740>, 3, {'id': 3, 'type': 0, 'freq': 22}),
 igraph.Vertex(<igraph.Graph object at 0x11a107740>, 4, {'id': 4, 'type': 0, 'freq': 12}),
 igraph.Vertex(<igraph.Graph object at 0x11a107740>, 5, {'id': 5, 'type': 0, 'freq': 8}),
 igraph.Vertex(<igraph.Graph object at 0x11a107740>, 6, {'id': 6, 'type': 0, 'freq': 2}),
 igraph.Vertex(<igraph.Graph object at 0x11a107740>, 7, {'id': 7, 'type': 0, 'freq': 10}),
 igraph.Vertex(<igraph.Graph object at 0x11a107740>, 8, {'id': 8, 'type': 1, 'freq': 23}),
 igraph.Vertex(<igraph.Graph object at 0x11a107740>, 9, {'id': 9, 'type': 1, 'freq': 39}),
 igraph.Vertex(<igraph.Graph object at 0x11a107740>, 10, {'id': 10, 'type': 1, 'freq': 13}),


In [34]:
new_df = pd.read_csv(filename_csv)
print(new_df.info())

print(list(sorted(new_df['uname'].value_counts().to_dict().items())))
print(list(sorted(new_df['rname'].value_counts().to_dict().items())))
print(list(new_df.groupby(['uname', 'rname']).size().to_dict().values()))


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   uname   100 non-null    int64
 1   rname   100 non-null    int64
 2   ACTION  100 non-null    int64
dtypes: int64(3)
memory usage: 2.5 KB
None
[(0, 11), (1, 8), (2, 27), (3, 22), (4, 12), (5, 8), (6, 2), (7, 10)]
[(8, 23), (9, 39), (10, 13), (11, 25)]
[11, 3, 5, 7, 20, 2, 10, 5, 5, 4, 8, 8, 2, 10]


In [35]:
g.es["weight"]

[11, 3, 5, 7, 20, 2, 10, 5, 5, 4, 8, 8, 2, 10]

In [18]:
### Save the graph

FILE_GRAPH = GLOBAL_PATH + "12-third_year/00-Data/"+PATH_DATASET+"/02-Graphs/binet-"+DATASET+"-Rw.graphml"
g.write_graphml(FILE_GRAPH  )

In [None]:
##