In [1]:
import networkx as nx
import numpy as np
import pickle
import random
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
import scipy
from collections import deque
import seaborn as sns

In [2]:
ls = dict()

for day in range(1, 31):
    with open(f'./input/yahoo-{day}.pickle', 'rb') as file:
        ls[day] = pickle.load(file)


In [3]:
tasks = set()
nodes = set()
c = dict() # task preferences
capacities = dict()

day = 1
imp_list = ls[day]
for x in imp_list:
    keyword = str(x[0])
    account_ids = x[1].keys()
    impressions = min(int(x[2]), 100)
    
    new_nodes = [str(keyword) + str(imp) for imp in range(impressions)]
    nodes.update(new_nodes)
    tasks.update(account_ids)
    
    for n in new_nodes:
        for acc in account_ids:
            c[(n, acc)] = random.uniform(0, 1)
            
# Setup capacities
for t in tasks:
    capacities[t] = 120

In [4]:
# Add conflicts
p = 1e-3
w = dict()
G = nx.Graph()
for u in nodes:
    for v in nodes:
        if u < v:
            if random.uniform(0, 1) < p:
                G.add_edge(u, v)
for e in G.edges():
    w[e] = 1
edges = list(G.edges())

In [None]:
nx.draw(G)

In [5]:
print(f'# of nodes = {len(nodes)}')
print(f'# of tasks = {len(tasks)}')
print(f'# of conflict edges = {len(G.edges())}')
print(f'total project capacities = {sum(capacities.values())}')

# of nodes = 8441
# of tasks = 4772
# of conflict edges = 35154
total project capacities = 572640


In [6]:
with open(f'./yahoo.pickle', 'wb') as file:
    pickle.dump(tasks, file) # project names
    pickle.dump(capacities, file) # max capacity for each project
    pickle.dump(c, file) # project preferences
    pickle.dump(w, file) # weights of conflict edges
    pickle.dump(edges, file) # conflict edges
    file.close()