## Personalized Recommendation Model

### Interfaces: DataIn
- CRM Data Connection
- Twitter Crawler

### Interfaces: DataOut

#### API Services
- What to work/learn/observe
- Whom to follow
- Partnership: With whom on what

### Data Objects
- SkillComplementarityModel
    - training/forming
    - given two nodes returning complementarity score
    - given a node returns the list of complementing nodes
- ExpertiseSeekingNet
- PotentialSocialNet
- UserBehavioralModel


### Backend Incremental Training Processes
- Update complementarity model from the profiles/projects.
- Update expertise net in terms of similarity and difference scores
- Update social similarity from localilty and twitter 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pprint
pp = pprint.PrettyPrinter(indent=0)
import csv
from os import listdir
from os.path import isfile, join
import math

In [2]:
def normalize(tag):
    tags = list()
    #print(tag)
    for t in tag.split(','):
        tag = t.lower().strip()
        tags.append(tag)
    return(tags)

In [3]:
def extend_alist_entry(adict, key, new_values):
    if key in adict.keys():
        values = set(adict[key])
        values.extend(new_values)
    else:
        values = set(new_values)
    return(values)

In [4]:
attr_file = "./in/attr.csv"
attr_data = dict()
with open(attr_file, mode='r') as infile:
    reader = csv.reader(infile)
    header = reader.__next__()
    print(header)
    for row in reader:
        mid = row[0]
        attr_data[mid] = {
            'Gender': row[1],
            'Type': row[2],
            'Location': row[3],
            'Twitter':'',
            'Tags': set()
        }


['ID', 'Gender', 'Type', 'Country']


In [5]:
pp.pprint(attr_data)

{'135377966': {'Gender': 'M',
             'Location': 'IT',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'135574293': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'O'},
'135574294': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'135574317': {'Gender': 'M',
             'Location': 'TR',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'137845895': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'O'},
'139195417': {'Gender': 'M',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'139195418': {'Gender': 'F',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'13919

             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'141907533': {'Gender': 'M',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'141907534': {'Gender': 'M',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'141907535': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'O'},
'141907536': {'Gender': 'M',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'141907537': {'Gender': 'M',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'141907539': {'Gender': 'M',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'141907540': {'Gender': 'F',
             'Location': 'UK',
      

'155229330': {'Gender': 'F',
             'Location': 'SP',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155236164': {'Gender': 'M',
             'Location': 'SP',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155241805': {'Gender': 'F',
             'Location': 'UK',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155243456': {'Gender': 'F',
             'Location': 'IT',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155250009': {'Gender': 'M',
             'Location': 'IT',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155257392': {'Gender': 'X',
             'Location': 'SK',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155259995': {'Gender': 'M',
             'Location': 'SP',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},

'155743049': {'Gender': 'M',
             'Location': 'SK',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155743075': {'Gender': 'F',
             'Location': 'UK',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155743181': {'Gender': 'M',
             'Location': 'SK',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155743223': {'Gender': 'M',
             'Location': 'IT',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155743320': {'Gender': 'M',
             'Location': 'SK',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155743363': {'Gender': 'M',
             'Location': 'SK',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155743697': {'Gender': 'M',
             'Location': 'SK',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},

In [6]:
len(attr_data)

526

In [7]:
csv_file = "./in/OM.csv"
tags_data = dict()
with open(csv_file, mode='r') as infile:
    reader = csv.reader(infile)
    reader.__next__()
    for row in reader:
        mid = row[0]
        tags = row[1:]
        #print(tags)
        tags = [t.strip() for t in tags if t]
        #print(tags)
        cTags = list()
        for t in tags:
            if not t: continue
            newtags = normalize(t)
            cTags.extend(newtags)
        cTags = [t.strip() for t in cTags if t]
        if not cTags:continue
        tags_data[mid] = extend_alist_entry(tags_data, mid, cTags)

In [8]:
pp.pprint(tags_data)

{'135377966': {'open', 'enabling', 'eu', 'openmaker team member'},
'135574294': {'team', 'member', 'openmaker'},
'135574317': {'machine learning',
             'openmaker team member',
             'python',
             'research and development',
             'sofware development'},
'139195439': {'3dprint',
             'ecology',
             'make it easier',
             'opensource',
             'socialinnovation'},
'139195444': {'creative', 'economist'},
'140411415': {'collaborativeeconomy',
             'innovation',
             'openmaker team member',
             'socialinnovation'},
'140411418': {'development', 'innovation', 'socialinnovation'},
'140411420': {'prototypes', '3d', '3dmodel', '3dprint', 'project'},
'140411756': {'design',
             'explore',
             'innovation',
             'makers',
             'openmaker team member',
             'systemic'},
'141446497': {'3d', 'entrepreneur', 'innovation', 'education'},
'141446498': {'innovation'},
'14144665

In [9]:
print(len(tags_data))

376


In [10]:
print(len(attr_data))
attr_data_persons = {i:v for i,v in  attr_data.items() if v['Type'] == 'P'}
print(len(attr_data_persons))

526
452


In [11]:
twitter_file = "./in/members.txt"
twitter_names = dict()
with open(twitter_file, mode='r') as infile:
    reader = csv.reader(infile)
    for row in reader:
        mid = row[0]
        tname = row[1]
        twitter_names[mid] = tname

In [12]:
pp.pprint(twitter_names)

{'135377966': 'dariomarmo_lama',
'135574293': 'UniBogazici',
'135574294': 'akbayraksemih',
'135574317': 'arman_boyaci',
'139195417': 'theopenshoes',
'139195418': 'Bihurgunea',
'139195419': 'ibatuz',
'139195420': 'sharebot3dstore',
'139195422': 'andrecatta',
'139195428': 'aitorssm',
'139195429': 'jabiluengo',
'139195434': 'jaarzallus',
'139195435': 'italiaCamp',
'139195436': 'cicli',
'139195439': 'Felfilsrl',
'139195440': 'GeonZee',
'139195444': 'erika_rushton',
'139195446': 'Leonardo_Zampi',
'139195447': 'EsseGi67',
'140411414': 'tommaspagnoli',
'140411415': 'LaMartelloni',
'140411417': 'FABtotum',
'140411418': 'gekonavsat',
'140411419': '_intech3d',
'140411756': 'luastorta',
'140411758': 'ditemibag',
'141446497': 'josu_oleaga',
'141446498': 'wordsfromliam',
'141446649': 'LJMUFablab',
'141446650': 'jamieisboss',
'141446651': 'makeliverpool',
'141446653': 'winterandkurth',
'141446656': 'AngelaOrigami',
'141907524': 'playwd',
'141907526': 'MukaLab',
'141907528': 'eiderina',
'141907531': 

In [13]:
len(twitter_names)

215

In [14]:
for mid, tname in twitter_names.items():
    if mid in attr_data.keys():
        attr_data[mid]['Twitter'] = tname 
    else:
        print(mid,tname)
        attr_data[mid] = {
            'Gender': '',
            'Type': '',
            'Location': '',
            'Tags':set(),
            'Twitter':tname
        }

159252146 top_ix
159435533 KayhanYavuz
159448765 mugnai_matteo
159548949 AMMINISTRAZIONE
159991559 bussimauripro


In [15]:
pp.pprint(attr_data)

{'135377966': {'Gender': 'M',
             'Location': 'IT',
             'Tags': set(),
             'Twitter': 'dariomarmo_lama',
             'Type': 'P'},
'135574293': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': 'UniBogazici',
             'Type': 'O'},
'135574294': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': 'akbayraksemih',
             'Type': 'P'},
'135574317': {'Gender': 'M',
             'Location': 'TR',
             'Tags': set(),
             'Twitter': 'arman_boyaci',
             'Type': 'P'},
'137845895': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'O'},
'139195417': {'Gender': 'M',
             'Location': 'x',
             'Tags': set(),
             'Twitter': 'theopenshoes',
             'Type': 'P'},
'139195418': {'Gender': 'F',
             'Location': 'x',
             'Tags': set()

             'Type': 'P'},
'141907534': {'Gender': 'M',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'141907535': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'O'},
'141907536': {'Gender': 'M',
             'Location': 'x',
             'Tags': set(),
             'Twitter': 'basquecook',
             'Type': 'P'},
'141907537': {'Gender': 'M',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'141907539': {'Gender': 'M',
             'Location': 'x',
             'Tags': set(),
             'Twitter': 'remoteinsightuk',
             'Type': 'P'},
'141907540': {'Gender': 'F',
             'Location': 'UK',
             'Tags': set(),
             'Twitter': 'fifigibbs',
             'Type': 'P'},
'141907541': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
 

             'Location': 'SK',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'154622651': {'Gender': 'F',
             'Location': 'SK',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'154645224': {'Gender': 'M',
             'Location': 'UK',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'154651876': {'Gender': 'M',
             'Location': 'SK',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'154653833': {'Gender': 'F',
             'Location': 'SP',
             'Tags': set(),
             'Twitter': 'LookAtMePl_',
             'Type': 'P'},
'154670192': {'Gender': 'F',
             'Location': 'SK',
             'Tags': set(),
             'Twitter': 'lpkova',
             'Type': 'P'},
'154671114': {'Gender': 'M',
             'Location': 'IT',
             'Tags': set(),
             'Twitter': 'GigiBattista',
             'Type': 'P'},

'155512743': {'Gender': 'M',
             'Location': 'SP',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155513206': {'Gender': 'F',
             'Location': 'SK',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155513804': {'Gender': 'F',
             'Location': 'IT',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155514121': {'Gender': 'M',
             'Location': 'SK',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155518876': {'Gender': 'F',
             'Location': 'IT',
             'Tags': set(),
             'Twitter': 'ilariamade',
             'Type': 'P'},
'155519905': {'Gender': 'M',
             'Location': 'FR',
             'Tags': set(),
             'Twitter': 'aamonnz',
             'Type': 'P'},
'155521926': {'Gender': 'M',
             'Location': 'IT',
             'Tags': set(),
             'Twitter': '',
         

'155710762': {'Gender': 'M',
             'Location': 'SK',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155714083': {'Gender': 'M',
             'Location': 'SK',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155714637': {'Gender': 'M',
             'Location': 'SP',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155714799': {'Gender': 'F',
             'Location': 'UK',
             'Tags': set(),
             'Twitter': 'flipparini',
             'Type': 'P'},
'155715549': {'Gender': 'M',
             'Location': 'UK',
             'Tags': set(),
             'Twitter': 'Info_SpringArch',
             'Type': 'P'},
'155716025': {'Gender': 'F',
             'Location': 'SK',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155716355': {'Gender': 'M',
             'Location': 'SK',
             'Tags': set(),
             'Twitter': '',
 

             'Location': 'UK',
             'Tags': set(),
             'Twitter': 'socialnaomi',
             'Type': 'P'},
'155768715': {'Gender': 'F',
             'Location': 'UK',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155771445': {'Gender': 'M',
             'Location': 'IT',
             'Tags': set(),
             'Twitter': 'marco.cassino',
             'Type': 'P'},
'155790083': {'Gender': 'M',
             'Location': 'SK',
             'Tags': set(),
             'Twitter': 'JakubVonkomer',
             'Type': 'P'},
'155791622': {'Gender': 'M',
             'Location': 'SK',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155791845': {'Gender': 'M',
             'Location': 'SK',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'155791981': {'Gender': 'X',
             'Location': 'IT',
             'Tags': set(),
             'Twitter': '',
             'Type

             'Type': 'O'},
'157623900': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'O'},
'157624151': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'O'},
'157624177': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'O'},
'157641208': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'O'},
'157664140': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'O'},
'157664504': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'O'},
'157664550': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             

In [16]:
for mid, tags in tags_data.items():
    if mid in attr_data.keys():
        attr_data[mid]['Tags'] = tags
    else:
        print(mid,tags)
        attr_data[mid] = {
            'Gender': '',
            'Type': '',
            'Location': '',
            'Tags':tags,
            'Twitter':''
        }

In [17]:
pp.pprint(attr_data)

{'135377966': {'Gender': 'M',
             'Location': 'IT',
             'Tags': {'open', 'enabling', 'eu', 'openmaker team member'},
             'Twitter': 'dariomarmo_lama',
             'Type': 'P'},
'135574293': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': 'UniBogazici',
             'Type': 'O'},
'135574294': {'Gender': '',
             'Location': 'x',
             'Tags': {'team', 'member', 'openmaker'},
             'Twitter': 'akbayraksemih',
             'Type': 'P'},
'135574317': {'Gender': 'M',
             'Location': 'TR',
             'Tags': {'machine learning',
                     'openmaker team member',
                     'python',
                     'research and development',
                     'sofware development'},
             'Twitter': 'arman_boyaci',
             'Type': 'P'},
'137845895': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
         

'141907535': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'O'},
'141907536': {'Gender': 'M',
             'Location': 'x',
             'Tags': set(),
             'Twitter': 'basquecook',
             'Type': 'P'},
'141907537': {'Gender': 'M',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'141907539': {'Gender': 'M',
             'Location': 'x',
             'Tags': set(),
             'Twitter': 'remoteinsightuk',
             'Type': 'P'},
'141907540': {'Gender': 'F',
             'Location': 'UK',
             'Tags': {'collaborator',
                     'educator',
                     'enterpreneurship',
                     'innovation',
                     'maker'},
             'Twitter': 'fifigibbs',
             'Type': 'P'},
'141907541': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitte

'153838985': {'Gender': 'M',
             'Location': 'SP',
             'Tags': {'science', 'citizens', 'laboratory'},
             'Twitter': 'fsanzgarcia',
             'Type': 'P'},
'153860336': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'153862027': {'Gender': 'M',
             'Location': 'SP',
             'Tags': {'research', 'digital', 'sts'},
             'Twitter': 'FARAONDEMETAL',
             'Type': 'P'},
'153889543': {'Gender': 'F',
             'Location': 'DK',
             'Tags': {'3d', 'material', 'design', 'education', 'share'},
             'Twitter': '',
             'Type': 'P'},
'153891508': {'Gender': 'M',
             'Location': 'SK',
             'Tags': {'design',
                     'development',
                     'innovation',
                     'interaction',
                     'science'},
             'Twitter': '',
             'Type': 'P'},
'153941328': {'Ge

                     'digitalfabrication',
                     'enterpreneurship',
                     'open',
                     'socialinnovation'},
             'Twitter': '',
             'Type': 'P'},
'155060253': {'Gender': 'M',
             'Location': 'IT',
             'Tags': {'bigdata',
                     'computerscience',
                     'doityourself',
                     'opensource',
                     'socialinnovation'},
             'Twitter': '',
             'Type': 'P'},
'155083636': {'Gender': 'M',
             'Location': 'SP',
             'Tags': {'digitalfabrication',
                     'enterpreneurship',
                     'fablab',
                     'innovation',
                     'maker'},
             'Twitter': 'verayle',
             'Type': 'P'},
'155085145': {'Gender': 'M',
             'Location': 'SP',
             'Tags': {'3d',
                     'design',
                     'empowering',
                     'enterpre

             'Type': 'P'},
'155306256': {'Gender': 'F',
             'Location': 'UK',
             'Tags': {'teacher', 'computing', 'maker', 'opensource', 'future'},
             'Twitter': 'Lmcunderwood',
             'Type': 'P'},
'155308986': {'Gender': 'F',
             'Location': 'IT',
             'Tags': {'creative',
                     'innovation',
                     'law',
                     'positivity',
                     'solutions'},
             'Twitter': '',
             'Type': 'P'},
'155335294': {'Gender': 'X',
             'Location': 'UK',
             'Tags': {'designer',
                     'education',
                     'electronics',
                     'engineer',
                     'machinist'},
             'Twitter': '',
             'Type': 'P'},
'155339362': {'Gender': 'M',
             'Location': 'UK',
             'Tags': {'social', 'design', 'innovation'},
             'Twitter': 'N1ckbro',
             'Type': 'P'},
'155341416': {'Gen

'155521973': {'Gender': 'M',
             'Location': 'SP',
             'Tags': {'maker', 'digitalfabrication', 'design', 'fablab'},
             'Twitter': 'mmtoran',
             'Type': 'P'},
'155524507': {'Gender': 'M',
             'Location': 'UK',
             'Tags': {'biological',
                     'cities',
                     'immersive',
                     'innovation',
                     'technologies'},
             'Twitter': 'UrbanBiosPro',
             'Type': 'P'},
'155525266': {'Gender': 'F',
             'Location': 'SK',
             'Tags': {'design',
                     'ecology',
                     'eu',
                     'innovation',
                     'sustainability'},
             'Twitter': '_MariaKadlecova',
             'Type': 'P'},
'155525797': {'Gender': 'F',
             'Location': 'SK',
             'Tags': {'creative', 'craft', 'design', 'innovation'},
             'Twitter': '',
             'Type': 'P'},
'155546218': {'Gender': 

                     'research'},
             'Twitter': '',
             'Type': 'P'},
'155730349': {'Gender': 'M',
             'Location': 'UK',
             'Tags': {'creativity'},
             'Twitter': '',
             'Type': 'P'},
'155730815': {'Gender': 'M',
             'Location': 'SK',
             'Tags': {'business strategy',
                     'disruption',
                     'industrial design',
                     'partnership',
                     'trust'},
             'Twitter': 'BystrikMicek',
             'Type': 'P'},
'155730904': {'Gender': 'M',
             'Location': 'SK',
             'Tags': {'innovation'},
             'Twitter': '',
             'Type': 'P'},
'155731312': {'Gender': 'M',
             'Location': 'IT',
             'Tags': {'community',
                     'computation',
                     'data',
                     'digitalfabrication',
                     'robotics'},
             'Twitter': 'NOUMENA_bcn',
             'Typ

             'Location': 'SP',
             'Tags': {'biology',
                     'enterpreneurship',
                     'innovation',
                     'open',
                     'share'},
             'Twitter': 'OpenLabMadrid',
             'Type': 'P'},
'157496701': {'Gender': 'M',
             'Location': 'UK',
             'Tags': {'creativity',
                     'optimization',
                     'people',
                     'quality',
                     'respect'},
             'Twitter': '',
             'Type': 'P'},
'157496705': {'Gender': 'M',
             'Location': 'IT',
             'Tags': {'craft',
                     'creative',
                     'design',
                     'manufacturing',
                     'socialinnovation'},
             'Twitter': '',
             'Type': 'P'},
'157496711': {'Gender': 'M',
             'Location': 'SP',
             'Tags': {'prototypes', 'design', 'fablab', 'innovation', 'maker'},
             'Twit

In [18]:
len(attr_data)

531

In [19]:
attr_TT = dict()
for mid, vals in attr_data.items():
    if not vals['Tags']: continue
    if not vals['Twitter']: continue
    attr_TT[mid] = vals

In [20]:
len(attr_TT)

173

In [21]:
attr_TT['155399752']['Gender'] = 'F'
attr_TT['135574294']['Gender'] = 'M'
attr_TT['135574294']['Location'] = 'TR'
attr_TT['153757086']['Gender'] = 'F'

In [22]:
for mid, vals in attr_TT.items():
    if vals['Gender'] in ['X','','x']:
        print(mid)
        pp.pprint(vals)

158526065
{'Gender': 'X',
'Location': 'SE',
'Tags': {'open'},
'Twitter': 'Dcuartielles',
'Type': 'P'}
154683766
{'Gender': 'X',
'Location': 'SP',
'Tags': {'embedded', 'research', 'iot', 'maker'},
'Twitter': 'kahache7',
'Type': 'P'}
157496737
{'Gender': 'X',
'Location': 'UK',
'Tags': {'designers', 'developers', 'opensource', 'discoverers', 'honest'},
'Twitter': 'igoouk',
'Type': 'P'}
155736202
{'Gender': 'X',
'Location': 'UK',
'Tags': {'creativity', 'software', 'innovation', 'hardware', 'development'},
'Twitter': 'neris_io',
'Type': 'P'}
153753368
{'Gender': 'X',
'Location': 'IT',
'Tags': {'industry40', 'design', 'digitalfabrication'},
'Twitter': 'glitch_factory',
'Type': 'P'}
157701519
{'Gender': 'X',
'Location': 'IT',
'Tags': {'coriandoli', 'prova'},
'Twitter': '983275932857nfd',
'Type': 'P'}
154868197
{'Gender': 'X',
'Location': 'UK',
'Tags': {'fatigue', 'workers', 'solution', 'healthcare', 'awareness'},
'Twitter': 'fmi_fatigue',
'Type': 'P'}
153643752
{'Gender': 'X',
'Location': 'IT

In [23]:
mypath = './in/fIDs/'
csvfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
len(csvfiles)

193

In [24]:
def get_CRM_Id(twitterName, AttrDataSet):
    for crm_id, data in AttrDataSet.items():
        if data['Twitter'] == twitterName:
            return(crm_id)
    return(None)

In [25]:
OMNetData = dict()
for f in csvfiles:
    ids = f.split("_fIDs.csv")[0].split('_')
    if len(ids) == 2:
        twitterId = ids[1]
        twitterName = ids[0]
    elif len(ids) > 2:
        twitterId = ids[-1]
        twitterName = '_'.join(ids[0:-1])
    else:
        continue
    df = pd.read_csv(join(mypath, f))
    fids = df.to_dict()[twitterId].values()
    crm_id = get_CRM_Id(twitterName,attr_TT)
    
    if not crm_id: continue
        
    OMNetData[crm_id] = {
        'TwitterName':twitterName,
        'nf':len(fids),
        'flist':set(fids),
        'TwitterId': twitterId
       }

In [26]:
pp.pprint(OMNetData)

{'135377966': {'TwitterId': '818381158715113472',
             'TwitterName': 'dariomarmo_lama',
             'flist': {633,
                      48903,
                      742143,
                      767285,
                      1051171,
                      1578141,
                      1913181,
                      2195241,
                      4970411,
                      5690612,
                      5736262,
                      5794752,
                      5893702,
                      6564562,
                      8170492,
                      8271262,
                      9224352,
                      10689892,
                      11382292,
                      11402552,
                      11406742,
                      11687672,
                      11980142,
                      12807362,
                      12855572,
                      13452672,
                      14060262,
                      14116114,
                      14162405,

                      278449667,
                      281081965,
                      283944220,
                      286233279,
                      287690893,
                      288780236,
                      288930081,
                      288959978,
                      292261462,
                      292844413,
                      294027322,
                      294225947,
                      295243200,
                      295320165,
                      295827042,
                      295857238,
                      296226684,
                      296281164,
                      297510240,
                      297755861,
                      299282839,
                      299663310,
                      299714415,
                      300253836,
                      300461616,
                      300492884,
                      302032526,
                      302574149,
                      304423552,
                      304852634,
          

                      730224961,
                      793625496,
                      811023366,
                      813749467,
                      814943232,
                      817192542,
                      833764070,
                      834991242,
                      845114430,
                      869617855,
                      893487462,
                      904244798,
                      937541844,
                      938867852,
                      962099900,
                      978098106,
                      1003738332,
                      1013265296,
                      1058077820,
                      1063752036,
                      1079909342,
                      1081269368,
                      1092235729,
                      1101218922,
                      1111268454,
                      1123067078,
                      1141086356,
                      1162911950,
                      1168303580,
                      11700022

                      2547763752,
                      2559213636,
                      2608003748,
                      2682587883,
                      2685334362,
                      2717823820,
                      2722074224,
                      2817192308,
                      2837008555,
                      2862161764,
                      2871059239,
                      2893165683,
                      2912907795,
                      2968593772,
                      2991847313,
                      3003828657,
                      3004279653,
                      3034898531,
                      3042302206,
                      3044804031,
                      3062778839,
                      3086389385,
                      3092363416,
                      3104296671,
                      3120694953,
                      3128609386,
                      3131492927,
                      3159341054,
                      3200045434,
              

                      1261520766,
                      1264688389,
                      1315071697,
                      1354508563,
                      1374160837,
                      1457669144,
                      1472969677,
                      1489959858,
                      1497949304,
                      1498342616,
                      1504002042,
                      1566070052,
                      1570204890,
                      1571270053,
                      1582740925,
                      1611332490,
                      1617603914,
                      1626294277,
                      1632913393,
                      1662547956,
                      1678092444,
                      1689075116,
                      1704085728,
                      1707259346,
                      1730577278,
                      1851668641,
                      1870138352,
                      1891806212,
                      1905365502,
              

                      4599641,
                      5930552,
                      6721492,
                      7849082,
                      12855572,
                      13982132,
                      14073331,
                      14159148,
                      14180907,
                      14599918,
                      14683580,
                      14683731,
                      14763706,
                      14898380,
                      15022225,
                      15139314,
                      15442630,
                      15996830,
                      16128396,
                      16363481,
                      17309892,
                      17772404,
                      17835020,
                      17937555,
                      18033071,
                      18161115,
                      18427095,
                      18544024,
                      20590331,
                      21056175,
                      21185883,
            

                      120596672,
                      124936827,
                      127478553,
                      131949006,
                      132654265,
                      133893663,
                      134447383,
                      134706910,
                      137119240,
                      138838089,
                      141707288,
                      142929759,
                      143392516,
                      145834859,
                      146601627,
                      146963767,
                      147525417,
                      147716280,
                      158363292,
                      166389845,
                      166956706,
                      171153605,
                      172629496,
                      175853436,
                      176597595,
                      176694321,
                      177905120,
                      179124989,
                      180404146,
                      185131936,
          

                      589082342,
                      596843142,
                      603220855,
                      605511043,
                      608121212,
                      609435239,
                      611780022,
                      613276712,
                      618098044,
                      619129274,
                      632826378,
                      636912791,
                      705082591,
                      718167032,
                      728312064,
                      754926919,
                      759823321,
                      800466343,
                      803149483,
                      803165588,
                      817729093,
                      823061456,
                      823344925,
                      830687221,
                      839477826,
                      851776567,
                      871622132,
                      872675798,
                      881928014,
                      884082554,
          

                      254967080,
                      255123777,
                      255199851,
                      255204102,
                      255479904,
                      255669531,
                      255938834,
                      256409027,
                      256634278,
                      256908102,
                      257982455,
                      258013563,
                      258245192,
                      258909166,
                      259227873,
                      259688069,
                      260022048,
                      260257518,
                      260285898,
                      261391429,
                      262354411,
                      262671228,
                      263405696,
                      264162545,
                      264996627,
                      265297428,
                      265474492,
                      265868109,
                      266077484,
                      266400754,
          

                      12184602,
                      12725692,
                      12855572,
                      13052462,
                      13257922,
                      13426312,
                      13595172,
                      13598262,
                      13784592,
                      13790232,
                      13812642,
                      14079975,
                      14082799,
                      14157408,
                      14161973,
                      14169364,
                      14174091,
                      14191640,
                      14241791,
                      14297863,
                      14301374,
                      14313180,
                      14330524,
                      14337718,
                      14349894,
                      14372143,
                      14389093,
                      14428116,
                      14442119,
                      14509390,
                      14549442,
        

                      405948588,
                      406885867,
                      408079737,
                      410921229,
                      411387448,
                      412080599,
                      413492524,
                      415116770,
                      415717240,
                      415748066,
                      417373768,
                      420351046,
                      421043917,
                      421118996,
                      422903446,
                      424263485,
                      426592460,
                      429114249,
                      430961436,
                      432436712,
                      435690687,
                      436842808,
                      443933998,
                      444724633,
                      448028442,
                      455624557,
                      456563035,
                      457310245,
                      457806910,
                      459558301,
          

                      1947195954,
                      1949322637,
                      1952365308,
                      1953857605,
                      1954830398,
                      1954879488,
                      1962300223,
                      1966770860,
                      1967109241,
                      1969101578,
                      1973860994,
                      1976066425,
                      2152526270,
                      2153578298,
                      2153750863,
                      2155260961,
                      2156720618,
                      2158497152,
                      2163252210,
                      2164487202,
                      2164985827,
                      2172838014,
                      2172912684,
                      2176812007,
                      2177816161,
                      2177934391,
                      2177937882,
                      2178279834,
                      2181861888,
              

                      212452691,
                      213375277,
                      213591716,
                      214476599,
                      214859312,
                      216015584,
                      217030924,
                      217565503,
                      218438767,
                      219427536,
                      219846823,
                      222479693,
                      223114623,
                      223419787,
                      223881230,
                      223887283,
                      224589531,
                      226662318,
                      228040559,
                      229211585,
                      229385617,
                      230117819,
                      232603412,
                      233884120,
                      234618247,
                      235120460,
                      236443764,
                      237243281,
                      239335032,
                      239760107,
          

                      13921242,
                      14075441,
                      14161973,
                      14174897,
                      14189263,
                      14337718,
                      14436030,
                      14607140,
                      14992855,
                      15107896,
                      15232782,
                      15253440,
                      15351715,
                      15438079,
                      15492359,
                      15492883,
                      15572919,
                      15581283,
                      15901999,
                      16135409,
                      16415492,
                      16575488,
                      16576356,
                      16838106,
                      16843780,
                      16900852,
                      17155094,
                      17229160,
                      17233135,
                      17305888,
                      17615549,
        

             'nf': 382},
'154678051': {'TwitterId': '3334520415',
             'TwitterName': 'playwd',
             'flist': {2172,
                      3051,
                      11983,
                      21693,
                      42683,
                      63313,
                      78453,
                      377733,
                      455483,
                      626163,
                      647043,
                      654583,
                      660313,
                      671083,
                      673973,
                      686543,
                      717313,
                      765845,
                      767285,
                      799675,
                      800991,
                      811303,
                      816653,
                      817812,
                      820828,
                      823129,
                      858071,
                      972651,
                      1118451,
                      1344951,
  

                      614343310,
                      621133544,
                      627539634,
                      635833515,
                      703656469,
                      713686112,
                      713741779,
                      726298380,
                      779859716,
                      783155124,
                      803291118,
                      817226966,
                      833764230,
                      848350633,
                      867253704,
                      921733452,
                      938685344,
                      942510835,
                      950515010,
                      966474366,
                      993624290,
                      1004076997,
                      1112559470,
                      1114568586,
                      1119211447,
                      1119270924,
                      1140009846,
                      1140242785,
                      1150385300,
                      1178533400,
 

                      744874544,
                      746974784,
                      749302104,
                      756702378,
                      761661913,
                      763634629,
                      790078050,
                      812721949,
                      814943232,
                      815298858,
                      825859951,
                      834991242,
                      838491104,
                      844241767,
                      846087786,
                      848110968,
                      849576097,
                      850762608,
                      855378235,
                      867503322,
                      867892392,
                      869863291,
                      871433712,
                      871783603,
                      875359160,
                      878847482,
                      883127384,
                      884565452,
                      886305884,
                      886753286,
          

                      976241220,
                      976648674,
                      981640908,
                      984739836,
                      984938930,
                      985077985,
                      987359311,
                      993185167,
                      993947048,
                      994748959,
                      1001792228,
                      1005468570,
                      1006169996,
                      1010735383,
                      1013265296,
                      1013314382,
                      1013967110,
                      1022887040,
                      1024658424,
                      1025002676,
                      1030298773,
                      1035096804,
                      1039332800,
                      1044749694,
                      1045080120,
                      1055722268,
                      1055889720,
                      1062707652,
                      1063752036,
                      10

                      1107055453,
                      1134081937,
                      1139594106,
                      1151465664,
                      1169281350,
                      1172755814,
                      1178498480,
                      1193559596,
                      1209573252,
                      1235284465,
                      1236001195,
                      1242298909,
                      1244040283,
                      1249486914,
                      1251637034,
                      1323400158,
                      1335979968,
                      1352296351,
                      1356498228,
                      1356678925,
                      1369617511,
                      1371765080,
                      1374071594,
                      1374160837,
                      1377715830,
                      1379662802,
                      1389047281,
                      1464317864,
                      1477835436,
              

                      94703610,
                      94926712,
                      104282000,
                      106831009,
                      107485292,
                      113952703,
                      119773237,
                      128479408,
                      137050226,
                      142839300,
                      144164139,
                      155196498,
                      160282512,
                      163559269,
                      170591249,
                      178494505,
                      183694577,
                      189985521,
                      190318174,
                      194916469,
                      197496299,
                      200492485,
                      202361886,
                      208769407,
                      211555837,
                      217526945,
                      218489964,
                      222385293,
                      225104924,
                      226519339,
            

                      418168646,
                      418220335,
                      419622371,
                      420351046,
                      420588509,
                      421363196,
                      424338209,
                      425878857,
                      427000620,
                      430623637,
                      431755738,
                      431976732,
                      432775110,
                      433193479,
                      434342966,
                      434800757,
                      436746229,
                      440080102,
                      440097805,
                      441648101,
                      443933998,
                      444419749,
                      444940567,
                      449253844,
                      451581109,
                      456157751,
                      456774139,
                      458687294,
                      459628453,
                      459979099,
          

                      58027623,
                      58324171,
                      58773077,
                      58788412,
                      59166185,
                      59780450,
                      59853082,
                      60122653,
                      60251026,
                      60598119,
                      60691872,
                      60849989,
                      60938786,
                      61768995,
                      62005346,
                      62637639,
                      62790769,
                      62791065,
                      63488455,
                      63873759,
                      65392256,
                      65658266,
                      66142691,
                      66639217,
                      66822252,
                      67565590,
                      67590719,
                      67634450,
                      68721442,
                      69064642,
                      69112412,
        

                      86990691,
                      90202949,
                      92061678,
                      92085041,
                      93178176,
                      94122848,
                      95877830,
                      97160096,
                      98706634,
                      98732981,
                      99089785,
                      100610543,
                      100706620,
                      101532373,
                      102456712,
                      102727300,
                      104893747,
                      106894107,
                      110171914,
                      110419148,
                      111933599,
                      113361100,
                      113582262,
                      118648026,
                      119711344,
                      122728088,
                      123899370,
                      124450567,
                      125646301,
                      126677638,
                     

                      2176812007,
                      2190136165,
                      2195607011,
                      2200397977,
                      2202072589,
                      2203174002,
                      2208927696,
                      2223592103,
                      2235026192,
                      2238908630,
                      2239497794,
                      2264743182,
                      2267433709,
                      2288138575,
                      2288622360,
                      2291141119,
                      2292953227,
                      2294209580,
                      2294657586,
                      2301073310,
                      2305416692,
                      2312979985,
                      2315423574,
                      2330169294,
                      2335541070,
                      2338601426,
                      2366247355,
                      2382361092,
                      2383525135,
              

             'flist': {428333,
                      742143,
                      759251,
                      807095,
                      813286,
                      823860,
                      1344951,
                      1652541,
                      1835411,
                      2467791,
                      2884771,
                      3108351,
                      5392522,
                      5402612,
                      6017542,
                      6433472,
                      6480682,
                      6484412,
                      7915762,
                      9245812,
                      9567972,
                      10810102,
                      10952962,
                      14246657,
                      14279047,
                      14293310,
                      14326840,
                      14735121,
                      14777215,
                      14982804,
                      15012486,
                      15082463,
  

                      310325061,
                      310325149,
                      311053185,
                      311870321,
                      312228522,
                      312349314,
                      312468366,
                      312704516,
                      313464875,
                      313826848,
                      313999262,
                      314764239,
                      315122180,
                      315184117,
                      315679290,
                      315768346,
                      315818488,
                      315855825,
                      315941208,
                      316181803,
                      316647998,
                      317221902,
                      317688921,
                      317775482,
                      318116923,
                      318204249,
                      318393653,
                      318948018,
                      319246523,
                      319476599,
          

                      3082340999,
                      3082932069,
                      3085150522,
                      3086804368,
                      3089128252,
                      3089144235,
                      3089720912,
                      3090211071,
                      3090615004,
                      3091067243,
                      3091428232,
                      3092024219,
                      3092092011,
                      3092553255,
                      3092697704,
                      3093677788,
                      3094208787,
                      3094540755,
                      3095871554,
                      3096545250,
                      3096687724,
                      3097026580,
                      3099404849,
                      3099498155,
                      3101949927,
                      3107027765,
                      3107964997,
                      3108013303,
                      3108041259,
              

                      17068001,
                      17134202,
                      17137512,
                      17155587,
                      17164552,
                      17258537,
                      17369252,
                      17384895,
                      17439345,
                      17465997,
                      17487184,
                      17489576,
                      17754586,
                      17896057,
                      18007230,
                      18023887,
                      18062400,
                      18077377,
                      18130686,
                      18164336,
                      18198946,
                      18289926,
                      18313779,
                      18368001,
                      18384521,
                      18412778,
                      18425002,
                      18467383,
                      18542382,
                      18630651,
                      18632200,
        

                      17450217,
                      17462723,
                      17529293,
                      17538409,
                      17541565,
                      17541602,
                      17548982,
                      17563531,
                      17595439,
                      17634248,
                      17652994,
                      17707080,
                      17730885,
                      17751344,
                      17754096,
                      17774865,
                      17795259,
                      17896057,
                      17897077,
                      17898170,
                      17933000,
                      17983020,
                      17988954,
                      17998609,
                      18047103,
                      18130686,
                      18173885,
                      18331726,
                      18334111,
                      18373451,
                      18492092,
        

                      18023887,
                      18048471,
                      18084100,
                      18144123,
                      18197477,
                      18200400,
                      18324392,
                      18430246,
                      18521397,
                      18555165,
                      18698017,
                      18721583,
                      18743550,
                      18807671,
                      18870989,
                      18875406,
                      18943853,
                      18980755,
                      19028805,
                      19108337,
                      19293123,
                      19382151,
                      19390810,
                      19411113,
                      19508072,
                      19563358,
                      19565216,
                      19579908,
                      19619047,
                      19829724,
                      19864285,
        

                      849969704,
                      851601619,
                      852457368,
                      855936252,
                      856938925,
                      857945804,
                      858046417,
                      861100513,
                      861828283,
                      862968037,
                      864449138,
                      869863291,
                      873748998,
                      875445685,
                      877293787,
                      878418086,
                      882077678,
                      882519432,
                      882649363,
                      887014015,
                      888443510,
                      888615294,
                      889083290,
                      890795336,
                      891524280,
                      895982282,
                      897225786,
                      897403544,
                      899682649,
                      900260748,
          

                      7013472,
                      7224702,
                      7400692,
                      7526892,
                      7582832,
                      7590212,
                      7998362,
                      8161232,
                      8425172,
                      8496102,
                      8633362,
                      8683242,
                      9048402,
                      9182082,
                      9185672,
                      9606472,
                      9628732,
                      9910962,
                      10033772,
                      10192242,
                      10231932,
                      10302222,
                      10813432,
                      11094232,
                      11107192,
                      11222502,
                      11336272,
                      11342952,
                      11512342,
                      11518842,
                      11672242,
                      1169

                      22021097,
                      22747978,
                      23450373,
                      23509899,
                      24208510,
                      24731339,
                      25275202,
                      25490158,
                      26012202,
                      26712390,
                      26866407,
                      28542415,
                      30638941,
                      33940579,
                      33985016,
                      34338032,
                      36366545,
                      38151597,
                      38381308,
                      38487101,
                      38966126,
                      40080850,
                      41565430,
                      44196397,
                      46118129,
                      46426258,
                      47433089,
                      47616071,
                      47910594,
                      47927725,
                      52412538,
        

                      338305554,
                      339623188,
                      340212738,
                      341564433,
                      341931411,
                      342003762,
                      342696728,
                      343248947,
                      346463695,
                      348446782,
                      348963650,
                      348966205,
                      351768840,
                      352593677,
                      353595458,
                      353751309,
                      355493219,
                      356202270,
                      357676148,
                      361302333,
                      361600531,
                      361808897,
                      362359553,
                      364201247,
                      364553217,
                      366755332,
                      368393835,
                      368501353,
                      369782522,
                      369989171,
          

                      15408666,
                      15446126,
                      15466399,
                      15502832,
                      15519952,
                      15528040,
                      15533931,
                      15566876,
                      15608867,
                      15625092,
                      15669868,
                      15680820,
                      15687399,
                      15765042,
                      15768338,
                      15837571,
                      15891322,
                      15904669,
                      15906950,
                      15971887,
                      15981406,
                      15994087,
                      15995101,
                      16026368,
                      16091912,
                      16113719,
                      16150760,
                      16169785,
                      16187683,
                      16272222,
                      16303106,
        

                      465965880,
                      468820605,
                      468847593,
                      470239939,
                      471217122,
                      471883751,
                      472334834,
                      472402803,
                      472867113,
                      472964817,
                      475025932,
                      475171989,
                      475301852,
                      475757901,
                      475776050,
                      477025705,
                      480326481,
                      482311338,
                      483192121,
                      484246883,
                      484657942,
                      484934846,
                      485637676,
                      487563142,
                      487666562,
                      488020752,
                      489445202,
                      490318984,
                      491108003,
                      492092655,
          

                      63259787,
                      65410718,
                      66116319,
                      67700256,
                      70983241,
                      72147399,
                      72277928,
                      74525417,
                      74689780,
                      74694501,
                      75254030,
                      75377078,
                      75539780,
                      76010207,
                      77808685,
                      81394054,
                      82172798,
                      82445809,
                      84403641,
                      85189725,
                      85465037,
                      86931439,
                      89459673,
                      90859504,
                      91330606,
                      91356164,
                      91774813,
                      91781128,
                      92302861,
                      94053060,
                      94175270,
        

                      721975452546527232,
                      730480774248595456,
                      732607408590204930,
                      733328775174336512,
                      734724234598027264,
                      745913376451203073,
                      756473003890896896,
                      764277346102759424,
                      776454333269471232,
                      789169448414351360,
                      793826541771698177,
                      798126196567457793,
                      799959122733125632,
                      803562577292853248,
                      818381158715113472,
                      820930566065451010,
                      821448274800295936,
                      822117746884411393,
                      827092120334893056,
                      831928665969741841,
                      840130350219116548,
                      851751392041717760,
                      854307337607237632,
                      856932585624

                      20049074,
                      20060485,
                      20067451,
                      20079459,
                      20104760,
                      20141923,
                      20142551,
                      20159292,
                      20165053,
                      20192128,
                      20228975,
                      20232196,
                      20234739,
                      20242378,
                      20243240,
                      20243764,
                      20249797,
                      20266400,
                      20305710,
                      20322459,
                      20346193,
                      20367011,
                      20369271,
                      20370081,
                      20383751,
                      20396267,
                      20425250,
                      20430888,
                      20448185,
                      20451498,
                      20452679,
        

                      586883330,
                      587441306,
                      589174641,
                      589318791,
                      592863344,
                      592921217,
                      594369284,
                      594527319,
                      596386392,
                      596924310,
                      597300955,
                      597331970,
                      597724942,
                      600007038,
                      600193617,
                      600371168,
                      601225689,
                      602233147,
                      603120237,
                      603978639,
                      604520966,
                      604674059,
                      604765133,
                      605673046,
                      607531615,
                      607594187,
                      609256640,
                      612190532,
                      614656307,
                      616205447,
          

                      1279909814,
                      1280325649,
                      1281673860,
                      1299475664,
                      1305618248,
                      1307848711,
                      1308687582,
                      1310289288,
                      1334975888,
                      1336322804,
                      1336386258,
                      1358905694,
                      1377715830,
                      1384150225,
                      1386566576,
                      1391501850,
                      1399388850,
                      1405756411,
                      1411960340,
                      1427313446,
                      1430641837,
                      1432520268,
                      1436690287,
                      1444215450,
                      1448529001,
                      1452385442,
                      1452578348,
                      1456529838,
                      1458925496,
              

                      172501837,
                      172815437,
                      173195708,
                      174807917,
                      175008526,
                      175031768,
                      175361476,
                      176413239,
                      176438976,
                      176461575,
                      177006079,
                      177182926,
                      178311103,
                      178494505,
                      178932440,
                      179093448,
                      179142856,
                      179485644,
                      179492796,
                      179515005,
                      180378097,
                      180434089,
                      180722476,
                      181111601,
                      182055312,
                      182077663,
                      182419758,
                      182471029,
                      182493733,
                      182897388,
          

                      3981079516,
                      3988853159,
                      3993373035,
                      4001068876,
                      4006636102,
                      4012922026,
                      4016850077,
                      4035104542,
                      4052984188,
                      4100099651,
                      4119437772,
                      4122342856,
                      4130034437,
                      4141475975,
                      4157344036,
                      4161785538,
                      4171806737,
                      4196859005,
                      4220238021,
                      4225124067,
                      4244271647,
                      4244662780,
                      4268224162,
                      4275423989,
                      4282204754,
                      4291249154,
                      4297418533,
                      4318831515,
                      4330726463,
              

                      271572926,
                      271986064,
                      272432360,
                      272910868,
                      273588297,
                      274709839,
                      275046297,
                      276540738,
                      277169100,
                      285307884,
                      287690893,
                      291424036,
                      291729226,
                      292872635,
                      293836763,
                      294225947,
                      296226684,
                      296281164,
                      297623696,
                      297835680,
                      297856777,
                      298890190,
                      300114634,
                      300361868,
                      301692207,
                      302032526,
                      303196169,
                      303875638,
                      304309611,
                      305189090,
          

                      351820091,
                      352269927,
                      352444530,
                      352949431,
                      353784188,
                      353853587,
                      353864247,
                      353938420,
                      355428046,
                      355598161,
                      355716335,
                      355749609,
                      356092345,
                      356202270,
                      356767759,
                      359543429,
                      359858121,
                      362654705,
                      363898089,
                      365089324,
                      365626248,
                      365627981,
                      365638185,
                      365904677,
                      366228124,
                      368240745,
                      368836270,
                      369131068,
                      369204258,
                      369627599,
          

                      9135,
                      10078,
                      10350,
                      10737,
                      11489,
                      11858,
                      11969,
                      12217,
                      12528,
                      12696,
                      12815,
                      12831,
                      13041,
                      13124,
                      13141,
                      13165,
                      13192,
                      13198,
                      13294,
                      13348,
                      13349,
                      13666,
                      13679,
                      13737,
                      14763,
                      16403,
                      17463,
                      23553,
                      24503,
                      25763,
                      28203,
                      29773,
                      36823,
                      41553,
               

                      89097023,
                      89244342,
                      89426674,
                      89691920,
                      89717683,
                      89728787,
                      89950327,
                      89950800,
                      90475662,
                      90521746,
                      90592530,
                      90631375,
                      90646723,
                      90899364,
                      90928961,
                      90984732,
                      91213553,
                      91351303,
                      91362907,
                      91773259,
                      92014786,
                      92116069,
                      92372528,
                      92580704,
                      92725765,
                      92801629,
                      92808813,
                      92825016,
                      93017945,
                      93613650,
                      93616107,
        

                      5644962,
                      5678882,
                      5717752,
                      5771252,
                      5829752,
                      6141832,
                      6363262,
                      6466252,
                      6621172,
                      6721492,
                      6745972,
                      6882402,
                      6900852,
                      7004052,
                      7366322,
                      7535362,
                      7569572,
                      7640782,
                      8154462,
                      8170492,
                      8328352,
                      8486802,
                      8683242,
                      8719482,
                      8727332,
                      8769542,
                      9261402,
                      9588752,
                      9618432,
                      9667902,
                      9752912,
                      9760482,
        

                      904984005514289154,
                      908079663817338880,
                      908653479220121600,
                      909101718985682945,
                      915234290186833921,
                      915907114111037441,
                      919841659084357632,
                      922334094150848513,
                      925431894061322246,
                      931562255854030848,
                      933626160658944000,
                      934541170851622917,
                      939178794220425218,
                      941004023469170689,
                      941187579315093504,
                      942746855552831489,
                      944674055373725696,
                      955093217170657281,
                      955528257285361664,
                      958303282153361408},
             'nf': 391},
'157496687': {'TwitterId': '171008822',
             'TwitterName': 'voost',
             'flist': {7088822,
                      985

                      71039201,
                      71538842,
                      71566365,
                      71864426,
                      72278667,
                      72547286,
                      72612042,
                      73374645,
                      73766535,
                      74416174,
                      74594552,
                      74724117,
                      74796329,
                      76023679,
                      76351820,
                      76950239,
                      78633884,
                      78850605,
                      79282899,
                      79712600,
                      80084977,
                      80096621,
                      81227854,
                      82103120,
                      82303907,
                      82428485,
                      84055372,
                      84268281,
                      84293359,
                      84302889,
                      84351228,
        

                      418613921,
                      420909681,
                      421303242,
                      421371780,
                      424268558,
                      424329706,
                      425662242,
                      426309880,
                      427298214,
                      427534254,
                      429090621,
                      429708182,
                      430987792,
                      431033479,
                      431750856,
                      433059187,
                      434794548,
                      434910411,
                      435679547,
                      435802630,
                      436137728,
                      436858222,
                      440169023,
                      448736236,
                      452226449,
                      452583955,
                      454368075,
                      456794504,
                      457750364,
                      457973298,
          

                      77706641,
                      77962892,
                      78143293,
                      78565246,
                      80278539,
                      80530081,
                      82034239,
                      82148791,
                      82360489,
                      82566863,
                      82703826,
                      83099289,
                      83212310,
                      83455076,
                      84115312,
                      84836633,
                      85126929,
                      85177314,
                      85179772,
                      85471243,
                      85532829,
                      85593164,
                      87756216,
                      89442245,
                      90641656,
                      90693412,
                      91327404,
                      92058239,
                      92342797,
                      92449467,
                      92730812,
        

In [27]:
len(OMNetData)

157

In [28]:
len(attr_TT)

173

In [29]:
TsimK = 0.3
TdifK = 0.1
TsimS = 0.1
TdifS = 0.3
OMExpertiseNet = dict()
count = 0
sims = 0
for a in attr_TT.keys():
    for b in attr_TT.keys():
        count += 1
        key = (a,b)
        if a == b: continue
        if (b,a) in OMExpertiseNet.keys(): continue
        sa = attr_TT[a]['Tags']
        sb = attr_TT[b]['Tags']
        nsa = len(sa)
        nsb = len(sb)
        dAB = len(sa - sb)
        dBA = len(sb - sa)
        AB = sa.intersection(sb)
        nAB = len(AB)
        if nAB > 0:
            simAB = nAB / (nsa * 1.0) if nsa != 0 else 0
            simBA = nAB / (nsb * 1.0) if nsb != 0 else 0
            difBA = dBA / (nsa * 1.0) if nsa != 0 else 0
            difAB = dAB / (nsb * 1.0) if nsb != 0 else 0
            if simAB  < TsimK: continue
            if simBA  < TsimK: continue
            if difAB  < TdifK: continue
            if difBA  < TdifK: continue
            OMExpertiseNet[key] = {'KsimAB': simAB,
                                 'KsimBA': simBA,
                                 'KdifAB': difAB,
                                 'KdifBA': difBA
                                }
            sims += 1
print(sims, count, sims/count)

745 29929 0.024892244979785493


In [30]:
pp.pprint(OMExpertiseNet)

{('135377966', '154442156'): {'KdifAB': 0.4,
                            'KdifBA': 0.75,
                            'KsimAB': 0.5,
                            'KsimBA': 0.4},
('139195439', '155394915'): {'KdifAB': 0.6,
                            'KdifBA': 0.6,
                            'KsimAB': 0.4,
                            'KsimBA': 0.4},
('139195439', '155742649'): {'KdifAB': 0.6,
                            'KdifBA': 0.6,
                            'KsimAB': 0.4,
                            'KsimBA': 0.4},
('139195439', '156649741'): {'KdifAB': 0.6,
                            'KdifBA': 0.6,
                            'KsimAB': 0.4,
                            'KsimBA': 0.4},
('140411415', '140411418'): {'KdifAB': 0.6666666666666666,
                            'KdifBA': 0.25,
                            'KsimAB': 0.5,
                            'KsimBA': 0.6666666666666666},
('140411415', '153762636'): {'KdifAB': 0.6666666666666666,
                            'KdifBA': 

                            'KsimBA': 0.3333333333333333},
('153507756', '153762450'): {'KdifAB': 0.6666666666666666,
                            'KdifBA': 0.6666666666666666,
                            'KsimAB': 0.3333333333333333,
                            'KsimBA': 0.3333333333333333},
('153507756', '153818672'): {'KdifAB': 0.6666666666666666,
                            'KdifBA': 0.6666666666666666,
                            'KsimAB': 0.3333333333333333,
                            'KsimBA': 0.3333333333333333},
('153507756', '154442156'): {'KdifAB': 0.2,
                            'KdifBA': 1.0,
                            'KsimAB': 0.6666666666666666,
                            'KsimBA': 0.4},
('153507756', '154578841'): {'KdifAB': 0.6666666666666666,
                            'KdifBA': 0.6666666666666666,
                            'KsimAB': 0.3333333333333333,
                            'KsimBA': 0.3333333333333333},
('153507756', '154670192'): {'KdifAB': 0.2,
      

                            'KdifBA': 0.6,
                            'KsimAB': 0.4,
                            'KsimBA': 0.4},
('155222038', '154518289'): {'KdifAB': 0.6,
                            'KdifBA': 0.6,
                            'KsimAB': 0.4,
                            'KsimBA': 0.4},
('155222038', '154670192'): {'KdifAB': 0.6,
                            'KdifBA': 0.6,
                            'KsimAB': 0.4,
                            'KsimBA': 0.4},
('155222038', '154747461'): {'KdifAB': 0.5,
                            'KdifBA': 0.8,
                            'KsimAB': 0.4,
                            'KsimBA': 0.3333333333333333},
('155222038', '154865718'): {'KdifAB': 0.75,
                            'KdifBA': 0.4,
                            'KsimAB': 0.4,
                            'KsimBA': 0.5},
('155222038', '155039170'): {'KdifAB': 0.6,
                            'KdifBA': 0.6,
                            'KsimAB': 0.4,
                            

('155729610', '155480169'): {'KdifAB': 0.6,
                            'KdifBA': 0.6,
                            'KsimAB': 0.4,
                            'KsimBA': 0.4},
('155729610', '155525266'): {'KdifAB': 0.6,
                            'KdifBA': 0.6,
                            'KsimAB': 0.4,
                            'KsimBA': 0.4},
('155729610', '155743075'): {'KdifAB': 0.6,
                            'KdifBA': 0.6,
                            'KsimAB': 0.4,
                            'KsimBA': 0.4},
('155729610', '157496690'): {'KdifAB': 0.6,
                            'KdifBA': 0.6,
                            'KsimAB': 0.4,
                            'KsimBA': 0.4},
('155731312', '155722638'): {'KdifAB': 0.6,
                            'KdifBA': 0.6,
                            'KsimAB': 0.4,
                            'KsimBA': 0.4},
('155736202', '140411418'): {'KdifAB': 1.0,
                            'KdifBA': 0.2,
                            'KsimAB': 0.4,


In [31]:
TsimS = 0.1
TdifS = 0.3
count = 0
sims = 0
OMSocialNet = dict()
for from_id in OMNetData.keys():
    mutuals = list()
    outL = list()
    inL = list()
    potentials = list()
    for to_id in OMNetData.keys():
        if from_id == to_id: continue
        ifrom_id = int(OMNetData[from_id]['TwitterId'])
        ito_id = int(OMNetData[to_id]['TwitterId'])
        afriends = OMNetData[from_id]['flist']
        bfriends = OMNetData[to_id]['flist']
        common = afriends.intersection(bfriends) 
        ab = ito_id in afriends 
        ba = ifrom_id in bfriends   
        nA = len(afriends)
        nB = len(bfriends)
        nC = len(common) * 1.0
        sim = nC / nA if nA > 0 else 0
        dif = (nB - nC) / nA if nA > 0 else 100
        
        #Reciprocities
        if ab and ba: mutuals.append(to_id)
        elif ab: outL.append(to_id)
        elif ba: inL.append(to_id)
        else: pass
    
        if sim < TsimS: continue
        if dif < TdifS: continue
        potentials.append({'id': to_id, 'sim': sim, 'dif': dif})
        
    OMSocialNet[from_id] = {
        'mutuals': mutuals,
        'in': inL,
        'out': outL,
        'potentials': potentials
    }
            
        
            

In [32]:
pp.pprint(OMSocialNet)

{'135377966': {'in': [],
             'mutuals': ['156612789',
                        '153762636',
                        '152730563',
                        '139195444',
                        '153862027',
                        '155714799',
                        '153689099',
                        '145489262',
                        '140411415',
                        '153562611',
                        '140411756',
                        '152583764',
                        '154865718',
                        '156582037',
                        '157784656',
                        '153757086',
                        '153818672'],
             'out': ['135574317',
                    '155546218',
                    '154747461',
                    '154735576',
                    '157238215',
                    '154678051',
                    '154578841',
                    '157496726'],
             'potentials': [{'dif': 0.91725768321513,
                        

                           'sim': 0.10599078341013825},
                           {'dif': 7.285714285714286,
                           'id': '153862027',
                           'sim': 0.19815668202764977}]},
'153702260': {'in': ['155473092', '155524507'],
             'mutuals': ['139195444', '155735454', '155759507'],
             'out': ['155668791'],
             'potentials': [{'dif': 0.734669095324833,
                           'id': '139195444',
                           'sim': 0.14632665452337584},
                           {'dif': 1.837887067395264,
                           'id': '155735454',
                           'sim': 0.12143290831815422},
                           {'dif': 1.8979963570127505,
                           'id': '155668791',
                           'sim': 0.10686095931997572}]},
'153716747': {'in': [], 'mutuals': [], 'out': [], 'potentials': []},
'153750682': {'in': ['154151913'],
             'mutuals': ['153680906',
                        

             'potentials': [{'dif': 2.4444444444444446,
                           'id': '154732297',
                           'sim': 0.1111111111111111},
                           {'dif': 252.0,
                           'id': '154151913',
                           'sim': 0.1111111111111111},
                           {'dif': 16.88888888888889,
                           'id': '155525266',
                           'sim': 0.1111111111111111},
                           {'dif': 11.555555555555555,
                           'id': '154532810',
                           'sim': 0.2222222222222222},
                           {'dif': 18.0,
                           'id': '158326587',
                           'sim': 0.1111111111111111},
                           {'dif': 425.22222222222223,
                           'id': '155742649',
                           'sim': 0.2222222222222222},
                           {'dif': 22.555555555555557,
                           'id': '15

                           'sim': 0.10126582278481013},
                           {'dif': 48.34177215189873,
                           'id': '155742649',
                           'sim': 0.12658227848101267},
                           {'dif': 19.0,
                           'id': '154442156',
                           'sim': 0.11392405063291139},
                           {'dif': 8.012658227848101,
                           'id': '155594430',
                           'sim': 0.12658227848101267},
                           {'dif': 13.164556962025317,
                           'id': '155739139',
                           'sim': 0.10126582278481013},
                           {'dif': 4.860759493670886,
                           'id': '156644216',
                           'sim': 0.10126582278481013},
                           {'dif': 6.151898734177215,
                           'id': '158307955',
                           'sim': 0.22784810126582278},
                    

In [33]:
connections = OMSocialNet[a]
pp.pprint(connections)

{'in': [],
'mutuals': [],
'out': [],
'potentials': [{'dif': 18.17241379310345,
              'id': '155657933',
              'sim': 0.1724137931034483},
              {'dif': 47.37931034482759,
              'id': '155511072',
              'sim': 0.1724137931034483},
              {'dif': 33.6551724137931,
              'id': '157496737',
              'sim': 0.13793103448275862},
              {'dif': 56.62068965517241,
              'id': '153702260',
              'sim': 0.1724137931034483},
              {'dif': 25.82758620689655,
              'id': '153762329',
              'sim': 0.10344827586206896},
              {'dif': 111.17241379310344,
              'id': '155735454',
              'sim': 0.10344827586206896},
              {'dif': 33.241379310344826,
              'id': '157496725',
              'sim': 0.20689655172413793}]}


In [34]:
len(OMSocialNet)

157

## Compute co-occurances

In [35]:
pp.pprint(attr_data)

{'135377966': {'Gender': 'M',
             'Location': 'IT',
             'Tags': {'open', 'enabling', 'eu', 'openmaker team member'},
             'Twitter': 'dariomarmo_lama',
             'Type': 'P'},
'135574293': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': 'UniBogazici',
             'Type': 'O'},
'135574294': {'Gender': 'M',
             'Location': 'TR',
             'Tags': {'team', 'member', 'openmaker'},
             'Twitter': 'akbayraksemih',
             'Type': 'P'},
'135574317': {'Gender': 'M',
             'Location': 'TR',
             'Tags': {'machine learning',
                     'openmaker team member',
                     'python',
                     'research and development',
                     'sofware development'},
             'Twitter': 'arman_boyaci',
             'Type': 'P'},
'137845895': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
       

             'Type': 'P'},
'146208755': {'Gender': 'M',
             'Location': 'IT',
             'Tags': {'cryptocurrency',
                     'css',
                     'html',
                     'javascript',
                     'js',
                     'python',
                     'sass'},
             'Twitter': 'andreafspeziale',
             'Type': 'P'},
'146224317': {'Gender': 'M',
             'Location': 'SP',
             'Tags': {'bigdata', 'opendata', 'python'},
             'Twitter': '',
             'Type': 'P'},
'146306585': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'152031440': {'Gender': 'M',
             'Location': 'IT',
             'Tags': {'bigdata',
                     'education',
                     'industry40',
                     'innovation',
                     'software'},
             'Twitter': 'd4n3in',
             'Type': 'P'},
'152035874': {'Gend

                     'industry40',
                     'innovation',
                     'mechatronics',
                     'socialinnovation'},
             'Twitter': '',
             'Type': 'P'},
'154442156': {'Gender': 'F',
             'Location': 'UK',
             'Tags': {'enabling', 'design', 'software', 'innovation', 'open'},
             'Twitter': 'Cofabrico',
             'Type': 'P'},
'154449529': {'Gender': 'M',
             'Location': 'IT',
             'Tags': {'aeronautics',
                     'drones',
                     'engineering',
                     'innovation',
                     'research'},
             'Twitter': 'SkyBoxEng',
             'Type': 'P'},
'154507926': {'Gender': 'M',
             'Location': 'SP',
             'Tags': {'developer',
                     'innovation',
                     'manufacturing',
                     'research',
                     'science'},
             'Twitter': '',
             'Type': 'P'},
'154518

             'Location': 'IT',
             'Tags': {'3dscanning', '3dprint', 'education', 'arduino'},
             'Twitter': '',
             'Type': 'P'},
'155222038': {'Gender': 'M',
             'Location': 'IT',
             'Tags': {'biology',
                     'design',
                     'energy',
                     'innovation',
                     'sustainability'},
             'Twitter': 'c_growl',
             'Type': 'P'},
'155222567': {'Gender': 'M',
             'Location': 'SK',
             'Tags': {'coworking',
                     'environment',
                     'innovation',
                     'prototypes',
                     'research and development'},
             'Twitter': 'RoboDekan',
             'Type': 'P'},
'155229330': {'Gender': 'F',
             'Location': 'SP',
             'Tags': {'co design',
                     'creative',
                     'curiosity',
                     'facilitator',
                     'innovation'},
 

             'Location': 'SK',
             'Tags': {'makingthings', 'design', 'law', 'innovation', 'open'},
             'Twitter': 'Michaela',
             'Type': 'P'},
'155480246': {'Gender': 'M',
             'Location': 'SK',
             'Tags': {'design',
                     'engineering',
                     'innovation',
                     'machine',
                     'solutions'},
             'Twitter': '',
             'Type': 'P'},
'155481818': {'Gender': 'F',
             'Location': 'IT',
             'Tags': {'automation',
                     'bigdata',
                     'digitalfabrication',
                     'industry40',
                     'iot'},
             'Twitter': 'MCinquerrui',
             'Type': 'P'},
'155485527': {'Gender': 'F',
             'Location': 'SK',
             'Tags': {'cooperation',
                     'creativity',
                     'education',
                     'vocational',
                     'youth'},
          

'155596516': {'Gender': 'F',
             'Location': 'UK',
             'Tags': {'materials', 'artist', 'technology', 'curator', 'craft'},
             'Twitter': '',
             'Type': 'P'},
'155596896': {'Gender': 'M',
             'Location': 'SK',
             'Tags': {'cooperation',
                     'development',
                     'entrepreneur',
                     'practical',
                     'technology'},
             'Twitter': '',
             'Type': 'P'},
'155597492': {'Gender': 'M',
             'Location': 'SK',
             'Tags': {'design',
                     'ecology',
                     'gardening',
                     'organic',
                     'sustainability'},
             'Twitter': '',
             'Type': 'P'},
'155599601': {'Gender': 'F',
             'Location': 'SK',
             'Tags': {'industry', 'ecology', 'bussines'},
             'Twitter': '',
             'Type': 'P'},
'155600821': {'Gender': 'F',
             'Location'

             'Type': 'P'},
'155710762': {'Gender': 'M',
             'Location': 'SK',
             'Tags': {'creativity',
                     'design',
                     'innovation',
                     'product',
                     'technology'},
             'Twitter': '',
             'Type': 'P'},
'155714083': {'Gender': 'M',
             'Location': 'SK',
             'Tags': {'wood', 'table'},
             'Twitter': '',
             'Type': 'P'},
'155714637': {'Gender': 'M',
             'Location': 'SP',
             'Tags': {'entrepreneur',
                     'industry40',
                     'iot',
                     'physics',
                     'salesman'},
             'Twitter': '',
             'Type': 'P'},
'155714799': {'Gender': 'F',
             'Location': 'UK',
             'Tags': {'social', 'innovation', 'future', 'open', 'systemic'},
             'Twitter': 'flipparini',
             'Type': 'P'},
'155715549': {'Gender': 'M',
             'Locati

             'Twitter': '',
             'Type': 'P'},
'157430046': {'Gender': 'M',
             'Location': 'SP',
             'Tags': {'collaborator',
                     'creative',
                     'design',
                     'education',
                     'innovation'},
             'Twitter': '',
             'Type': 'P'},
'157489982': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'O'},
'157496677': {'Gender': 'F',
             'Location': 'IT',
             'Tags': {'bellezza',
                     'design',
                     'enterpreneurship',
                     'open',
                     'share'},
             'Twitter': '',
             'Type': 'P'},
'157496679': {'Gender': 'M',
             'Location': 'NL',
             'Tags': {'industry40'},
             'Twitter': '',
             'Type': 'P'},
'157496680': {'Gender': 'M',
             'Location': 'SK',
             'Tags': {'d

             'Type': 'O'},
'157976809': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'P'},
'157980316': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'O'},
'158179681': {'Gender': 'M',
             'Location': 'IT',
             'Tags': {'designer',
                     'engineer',
                     'innovator',
                     'mechatronics',
                     'product'},
             'Twitter': 'LorenzoDuroux',
             'Type': 'P'},
'158180341': {'Gender': '',
             'Location': 'x',
             'Tags': set(),
             'Twitter': '',
             'Type': 'O'},
'158304300': {'Gender': 'F',
             'Location': 'IT',
             'Tags': {'design',
                     'education',
                     'industry40',
                     'manufacturing',
                     'research'},
             'Twit

In [36]:
def form_cooccurances(attr_data):
    attrs = [v['Tags'] for k, v in attr_data.items() if v['Tags']]
    alltags = set()
    for k in attrs:
        alltags = alltags.union(set(k))
    occurance = dict()
    for t in alltags:
        occurance[t] = {'f':0,'co':{}}
    for kset in attrs:
        for k in kset:
            occurance[k]['f'] += 1
            for t in kset:
                if t == k: continue
                if t in occurance[k]['co'].keys():
                    occurance[k]['co'][t] += 1
                else:
                    occurance[k]['co'][t] = 1
    return(occurance)

In [37]:
cooccurances = form_cooccurances(attr_data)
pp.pprint(cooccurances)

{'3d': {'co': {'3dmodel': 1,
            '3dprint': 4,
            'additive': 1,
            'ar': 1,
            'artist': 1,
            'cad': 1,
            'construction': 1,
            'craft': 1,
            'creative': 1,
            'creativity': 1,
            'design': 10,
            'development': 1,
            'digitalfabrication': 3,
            'disability': 1,
            'education': 2,
            'empowering': 1,
            'enabling': 1,
            'engineer': 1,
            'engineering': 2,
            'enterpreneurship': 1,
            'entrepreneur': 2,
            'fablab': 2,
            'health': 1,
            'industry40': 1,
            'innovation': 15,
            'iot': 2,
            'jewellery': 1,
            'lpg': 1,
            'maker': 3,
            'manufacturing': 4,
            'material': 3,
            'metal': 1,
            'open': 2,
            'printer': 1,
            'product': 1,
            'production': 2,
            'proje

              'modelli': 1},
        'f': 1},
'artist': {'co': {'3d': 1,
                'biological': 1,
                'collaborator': 2,
                'craft': 2,
                'creative': 1,
                'creativity': 1,
                'curator': 2,
                'design': 3,
                'digital': 1,
                'educator': 1,
                'facilitator': 1,
                'healthcare': 1,
                'innovation': 4,
                'jewellery': 1,
                'maker': 1,
                'materials': 1,
                'product': 1,
                'socialist': 1,
                'sustainability': 1,
                'technology': 1},
          'f': 8},
'audio': {'co': {'opendesign': 1, 'research': 1, 'solutions': 1}, 'f': 1},
'automation': {'co': {'bigdata': 1,
                    'cad': 1,
                    'controls': 1,
                    'determined': 1,
                    'digitalfabrication': 1,
                    'embedded': 1,
          

                       'engineering': 1,
                       'future': 1,
                       'industrial robot': 1,
                       'industry40': 1,
                       'innovation': 1,
                       'inovation': 1,
                       'maker': 1,
                       'makingthings': 1,
                       'music': 1,
                       'opendesign': 1,
                       'production': 1,
                       'visual': 1},
                 'f': 4},
'collaborativeeconomy': {'co': {'3dprint': 1,
                              'creative': 1,
                              'curiosity': 1,
                              'development': 1,
                              'education': 2,
                              'empowering': 1,
                              'innovation': 2,
                              'inventor': 1,
                              'openmaker team member': 1,
                              'proramming': 1,
                            

                'artist': 3,
                'beauty': 1,
                'bellezza': 1,
                'biology': 1,
                'business': 1,
                'cad': 1,
                'ceramics': 1,
                'cnc': 3,
                'co design': 1,
                'collaborator': 2,
                'computer': 1,
                'consulting': 2,
                'courious': 1,
                'coworking': 1,
                'craft': 7,
                'crafts': 3,
                'creative': 15,
                'creativity': 11,
                'determined': 1,
                'development': 2,
                'digital': 3,
                'digitalfabrication': 12,
                'disability': 1,
                'ecology': 2,
                'economist': 1,
                'education': 8,
                'electronics': 2,
                'empowering': 2,
                'enabling': 1,
                'energy': 3,
                'engineering': 4,
                'enterp

                            'storage': 1,
                            'teacher': 1,
                            'urbanfarming': 1},
                      'f': 21},
'disability': {'co': {'3d': 1,
                    '3dprint': 1,
                    'arduino': 1,
                    'consultant': 1,
                    'cooperation': 1,
                    'creative': 1,
                    'creativity': 1,
                    'design': 1,
                    'determined': 1,
                    'education': 1,
                    'laboratory': 1,
                    'people': 1,
                    'raspberrypi': 1,
                    'sensors': 1,
                    'social': 2,
                    'socialinnovation': 1,
                    'sport': 1},
              'f': 6},
'discoverers': {'co': {'designers': 1,
                     'developers': 1,
                     'honest': 1,
                     'opensource': 1},
               'f': 1},
'disruption': {'co': {'business stra

'html': {'co': {'cryptocurrency': 1,
              'css': 1,
              'javascript': 1,
              'js': 1,
              'python': 1,
              'sass': 1},
        'f': 1},
'humanitarian': {'co': {'data': 1,
                      'design': 1,
                      'industrial': 1,
                      'innovation': 1,
                      'maker': 1,
                      'supplychain': 1,
                      'syrups': 1},
                'f': 2},
'husband': {'co': {'architect': 1,
                 'creative': 1,
                 'father': 1,
                 'sustainability': 1},
           'f': 1},
'hydro': {'co': {'energy': 1,
               'manufacturing': 1,
               'research and development': 1,
               'tradition': 1},
         'f': 1},
'immersive': {'co': {'biological': 1,
                   'cities': 1,
                   'innovation': 1,
                   'technologies': 1},
             'f': 1},
'inclusion': {'co': {'cultural': 1,
            

                  'maker': 1,
                  'proramming': 1,
                  'research': 2,
                  'research and development': 2,
                  'respect': 1,
                  'science': 1,
                  'storage': 1,
                  'systems': 1,
                  'technology': 1,
                  'wave': 1,
                  'wind': 1},
            'f': 9},
'iot': {'co': {'3d': 2,
             '3dprint': 2,
             'ar': 1,
             'automation': 1,
             'bigdata': 1,
             'cad': 1,
             'cncrouter': 1,
             'developer': 1,
             'development': 1,
             'digitalfabrication': 1,
             'embedded': 1,
             'enabling': 1,
             'entrepreneur': 2,
             'geometry': 1,
             'industry40': 2,
             'innovation': 2,
             'light': 1,
             'lpg': 1,
             'maker': 2,
             'makingthings': 1,
             'manufacturer': 1,
             'phy

'measurement': {'co': {'research and development': 1, 'vision': 1}, 'f': 1},
'mechatronics': {'co': {'cooperation': 1,
                      'craft': 1,
                      'creativity': 1,
                      'design': 1,
                      'designer': 1,
                      'engineer': 1,
                      'engineering': 1,
                      'industry40': 1,
                      'innovation': 1,
                      'innovator': 1,
                      'opensource': 1,
                      'product': 1,
                      'research': 1,
                      'socialinnovation': 1},
                'f': 4},
'melamine': {'co': {'manufacturer': 1, 'mdf': 1, 'veneer': 1}, 'f': 1},
'member': {'co': {'openmaker': 1, 'team': 1}, 'f': 1},
'mentor': {'co': {'agile': 2, 'coach': 1, 'empowering': 2, 'scrum': 2}, 'f': 2},
'metal': {'co': {'3d': 1,
               'additive': 1,
               'cnc': 1,
               'digitalfabrication': 1,
               'educator': 1,
 

                 'project': 1,
                 'robotics': 1,
                 'timeserved': 1},
           'f': 2},
'process': {'co': {'innovations': 1, 'optimalization': 1}, 'f': 1},
'product': {'co': {'3d': 1,
                 '3dp': 1,
                 '3dprint': 1,
                 'artist': 1,
                 'creative': 1,
                 'creativity': 1,
                 'design': 2,
                 'designer': 1,
                 'digital': 1,
                 'electronics': 1,
                 'engineer': 1,
                 'entrepreneurship': 1,
                 'innovation': 1,
                 'innovator': 1,
                 'maker': 1,
                 'mechatronics': 1,
                 'prototypes': 1,
                 'research and development': 1,
                 'technology': 1},
           'f': 5},
'production': {'co': {'3d': 2,
                    'aerospace': 1,
                    'agrotourism': 1,
                    'art': 1,
                    'automot

                'sustainability': 1,
                'systemic': 1,
                'tecnologico': 1},
          'f': 14},
'socialinnovation': {'co': {'3dprint': 1,
                          'aquaponics': 1,
                          'bettereurope': 1,
                          'bigdata': 1,
                          'co design': 1,
                          'collaborativeeconomy': 3,
                          'communitybuilding': 1,
                          'computerscience': 1,
                          'cooperation': 2,
                          'craft': 1,
                          'creative': 3,
                          'creativity': 3,
                          'design': 6,
                          'development': 2,
                          'digitalfabrication': 3,
                          'disability': 1,
                          'doityourself': 1,
                          'ecology': 1,
                          'education': 3,
                          'empowering': 2,
 

                  'networking': 1,
                  'sustainability': 1},
            'f': 2},
'transport': {'co': {'cycling': 1, 'environment': 1, 'health': 1, 'mobility': 1},
             'f': 1},
'trust': {'co': {'business strategy': 1,
               'ceo': 1,
               'disruption': 1,
               'empathy': 1,
               'fair play': 1,
               'furniture manufacturer': 1,
               'industrial design': 1,
               'network': 1,
               'partnership': 1,
               'socialinnovation': 1,
               'sustainability': 1},
         'f': 3},
'uav': {'co': {'drones': 1, 'marketing': 1, 'pr': 1, 'technology': 1}, 'f': 1},
'ui': {'co': {'dataviz': 1, 'innovation': 1}, 'f': 1},
'urbanfarming': {'co': {'aquaponics': 1,
                      'creativity': 1,
                      'design': 1,
                      'digitalfabrication': 1,
                      'entrepreneur': 1,
                      'innovation': 2,
                      'open

In [38]:
print(len(cooccurances.keys()))

409


In [39]:
pp.pprint(sorted(cooccurances.keys()))

['3d',
'3dmodel',
'3dp',
'3dprint',
'3dscanning',
'accessibility',
'accessibilità',
'additive',
'aec',
'aeronautics',
'aerospace',
'agile',
'agrotourism',
'ai',
'airbrush',
'anthropocene',
'applied',
'aquaponics',
'ar',
'architechture',
'architect',
'arduino',
'art',
'arte',
'artist',
'audio',
'automation',
'automotive',
'awareness',
'beauty',
'bellezza',
'bettereurope',
'biesse',
'bigdata',
'bim',
'biohacker',
'biological',
'biology',
'board',
'bottle',
'building',
'business',
'business strategy',
'bussines',
'cad',
'ceo',
'ceramics',
'cfd',
'changemaker',
'chemisty',
'childrens',
'ciao',
'cities',
'citizens',
'cloud',
'cnc',
'cncrouter',
'co design',
'coach',
'colaboration',
'collaboration',
'collaborativeeconomy',
'collaborator',
'community',
'communitybuilding',
'composite',
'computation',
'computer',
'computerscience',
'computing',
'connectingideas',
'connector',
'construction',
'consultant',
'consulting',
'controls',
'controlsystems',
'cooperation',
'coriandoli',
'corrugated',
'c

In [40]:
def get_complementarity_score(a,b,occurances):
    if a not in occurances.keys(): return(-1)
    if b not in occurances.keys(): return(-1)
    fb = occurances[b]['f']
    if b not in occurances[a]['co'].keys(): return(0)
    cab = occurances[a]['co'][b]
    if not fb: return(0)
    return(1.0 * cab / fb)

In [41]:
def recommend_topic(pA,pB,occurances):
    commonset = set(pA).intersection(set(pB))
    unionset =  set(pA).union(set(pB))
    disjointset = unionset - commonset
    
    complementarity = dict()
    
    for d in disjointset:
        complementarity[d] = 0
        for c in commonset:
            complementarity[d] += get_complementarity_score(d,c,occurances)
    compplementarity = sorted(complementarity.items(), key=lambda x: x[1], reverse=True)
    return({'common': commonset, 'complement': compplementarity})
    

In [42]:
def geometric_mean(a,b):
    return(math.sqrt(a * b))

In [43]:
a = '140411415'
b = '152031440'
pA = attr_data[a]['Tags']
pB = attr_data[b]['Tags']
recommend_topic(pA,pB,cooccurances)

In [44]:
recommend_topic(pA,pB,cooccurances)

{'common': {'innovation'},
 'complement': [('education', 0.1103448275862069),
  ('industry40', 0.06896551724137931),
  ('software', 0.05517241379310345),
  ('socialinnovation', 0.05517241379310345),
  ('bigdata', 0.027586206896551724),
  ('openmaker team member', 0.013793103448275862),
  ('collaborativeeconomy', 0.013793103448275862)]}

In [45]:
pp.pprint(OMSocialNet.keys())

dict_keys(['154732297', '153680906', '154151913', '155525266', '155743739', '155519905', '155399752', '154860115', '155656873', '156612789', '155704668', '154532810', '158326587', '141446656', '157496826', '155742649', '155614016', '135574317', '155587917', '155188374', '155220790', '154683832', '153762636', '155730815', '155222038', '152730563', '154536472', '155710159', '154442156', '144923437', '155468589', '152031440', '135377966', '155296486', '158526065', '157496825', '155546388', '155019070', '153750682', '157338963', '155594430', '155486371', '155499906', '139195444', '155739139', '155709442', '155657933', '155588890', '155660568', '153941328', '155103070', '153862027', '155656826', '139195439', '141907540', '155726182', '155714799', '156644216', '154868197', '155170222', '155722638', '153838985', '153689099', '140411418', '154671114', '155546218', '153753368', '154747461', '155511072', '155089214', '155708792', '157496737', '155085145', '155518876', '155715549', '158307955', '

In [46]:
len(OMSocialNet.keys())

157

In [47]:
len(attr_TT)

173

In [48]:
pp.pprint(attr_TT)

{'135377966': {'Gender': 'M',
             'Location': 'IT',
             'Tags': {'open', 'enabling', 'eu', 'openmaker team member'},
             'Twitter': 'dariomarmo_lama',
             'Type': 'P'},
'135574294': {'Gender': 'M',
             'Location': 'TR',
             'Tags': {'team', 'member', 'openmaker'},
             'Twitter': 'akbayraksemih',
             'Type': 'P'},
'135574317': {'Gender': 'M',
             'Location': 'TR',
             'Tags': {'machine learning',
                     'openmaker team member',
                     'python',
                     'research and development',
                     'sofware development'},
             'Twitter': 'arman_boyaci',
             'Type': 'P'},
'139195439': {'Gender': 'M',
             'Location': 'IT',
             'Tags': {'3dprint',
                     'ecology',
                     'make it easier',
                     'opensource',
                     'socialinnovation'},
             'Twitter': 'Felfils

             'Tags': {'innovation'},
             'Twitter': 'chiaramanicardi',
             'Type': 'P'},
'154567552': {'Gender': 'F',
             'Location': 'SP',
             'Tags': {'cooperation',
                     'facilitator',
                     'industry40',
                     'innovation',
                     'ux'},
             'Twitter': 'tatianabartolom',
             'Type': 'P'},
'154578841': {'Gender': 'M',
             'Location': 'IT',
             'Tags': {'development', 'innovation', 'enterpreneurship'},
             'Twitter': 'plugandwear',
             'Type': 'P'},
'154653833': {'Gender': 'F',
             'Location': 'SP',
             'Tags': {'arduino',
                     'cycling',
                     'entrepreneur',
                     'inventor',
                     'respect'},
             'Twitter': 'LookAtMePl_',
             'Type': 'P'},
'154670192': {'Gender': 'F',
             'Location': 'SK',
             'Tags': {'craft',
         

                     'research and development'},
             'Twitter': 'RoboDekan',
             'Type': 'P'},
'155241805': {'Gender': 'F',
             'Location': 'UK',
             'Tags': {'design',
                     'health',
                     'innovation',
                     'interaction',
                     'sustainability'},
             'Twitter': 'OliToolOgenblik',
             'Type': 'P'},
'155250009': {'Gender': 'M',
             'Location': 'IT',
             'Tags': {'inventor', 'design', 'technology', 'craft', 'systems'},
             'Twitter': 'frabomba',
             'Type': 'P'},
'155259995': {'Gender': 'M',
             'Location': 'SP',
             'Tags': {'developer', '3dprint', 'iot', 'innovation', 'share'},
             'Twitter': 'MiguelACasanova',
             'Type': 'P'},
'155294724': {'Gender': 'M',
             'Location': 'IT',
             'Tags': {'etichs', 'opensource'},
             'Twitter': 'kino3d',
             'Type': 'P'},
'1552

In [49]:
# A member:
a = '155039170'

slist = {t:v for t,v in OMExpertiseNet.items() if t[0] == a or t[1] == a}
slist = [t for t,v in OMExpertiseNet.items() if t[0] == a or t[1] == a]

nslist = set()
for s in slist:
    nslist = nslist.union(set(s))
nslist = nslist - set(a)

socscores = dict()
connections = OMSocialNet[a]
location_a = attr_TT[a]['Location']
for b in nslist:
    location_b = attr_TT[a]['Location']
    score = 0
    if location_a == location_b: score += 0.2
    if b in connections['mutuals']: score += 0.4
    if b in connections['in']: score += 0.2
    if b in connections['out']: score += 0.15
    potentials = {p['id']:p['sim'] for p in connections['potentials']}
    if b in potentials.keys():
        score += potentials[b] * 0.5
    if score:
        socscores[b] = score
pp.pprint(socscores)

scores = dict()
for b in socscores.keys():
    expscore = 0
    if (a,b) in OMExpertiseNet.keys():
        expscore = OMExpertiseNet[(a,b)]['KsimAB']
    elif (b,a) in OMExpertiseNet.keys():
        expscore = OMExpertiseNet[(b,a)]['KsimBA']
    netscore = socscores[b]
    score = geometric_mean(expscore,netscore)
    scores[b] = score
    
scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)

b = scores[0][0]
pA = attr_data[a]['Tags']
pB = attr_data[b]['Tags']
topics = recommend_topic(pA,pB,cooccurances)
pp.pprint(topics)

{'140411756': 0.2,
'141446656': 0.2,
'141907540': 0.2,
'142954818': 0.2,
'153507756': 0.2,
'154442156': 0.2,
'154670192': 0.2,
'154678051': 0.2,
'154751469': 0.2,
'155039170': 0.2,
'155083636': 0.2,
'155085145': 0.2,
'155103070': 0.2,
'155170222': 0.2,
'155220790': 0.2,
'155222038': 0.2,
'155241805': 0.2,
'155339362': 0.2,
'155480169': 0.2,
'155486371': 0.2,
'155521973': 0.35013054830287205,
'155525266': 0.2,
'155546218': 0.2,
'155657933': 0.2,
'155660568': 0.2,
'155704668': 0.2,
'155729610': 0.2,
'155740257': 0.2,
'155743075': 0.2,
'155771445': 0.2,
'156002767': 0.2,
'157496690': 0.2,
'157496711': 0.2,
'157496825': 0.2,
'157784656': 0.2}
{'common': {'design', 'maker'},
'complement': [('innovation', 0.7537647537647538),
              ('digitalfabrication', 0.28001628001628),
              ('fablab', 0.24704924704924702),
              ('creative', 0.20187220187220187),
              ('laboratory', 0.07000407000407)]}


In [50]:
# A member:
a = '135377966'

slist = {t:v for t,v in OMExpertiseNet.items() if t[0] == a or t[1] == a}
slist = [t for t,v in OMExpertiseNet.items() if t[0] == a or t[1] == a]

nslist = set()
for s in slist:
    nslist = nslist.union(set(s))
nslist = nslist - set(a)

In [51]:
socscores = dict()
connections = OMSocialNet[a]
location_a = attr_TT[a]['Location']
for b in nslist:
    location_b = attr_TT[a]['Location']
    score = 0
    if location_a == location_b: score += 0.2
    if b in connections['mutuals']: score += 0.4
    if b in connections['in']: score += 0.2
    if b in connections['out']: score += 0.15
    potentials = {p['id']:p['sim'] for p in connections['potentials']}
    if b in potentials.keys():
        score += potentials[b] * 0.5
    if score:
        socscores[b] = score
pp.pprint(socscores)

{'135377966': 0.2, '154442156': 0.2}


In [52]:
scores = dict()
for b in socscores.keys():
    expscore = 0
    if (a,b) in OMExpertiseNet.keys():
        expscore = OMExpertiseNet[(a,b)]['KsimAB']
    elif (b,a) in OMExpertiseNet.keys():
        expscore = OMExpertiseNet[(b,a)]['KsimBA']
    netscore = socscores[b]
    score = geometric_mean(expscore,netscore)
    scores[b] = score

In [53]:
scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)


In [54]:
b = scores[0][0]

In [55]:
b = scores[0][0]
pA = attr_data[a]['Tags']
pB = attr_data[b]['Tags']
topics = recommend_topic(pA,pB,cooccurances)
pp.pprint(topics)

{'common': {'enabling', 'open'},
'complement': [('innovation', 0.8833333333333333),
              ('design', 0.41666666666666663),
              ('software', 0.31666666666666665),
              ('eu', 0.21666666666666667),
              ('openmaker team member', 0.21666666666666667)]}


In [56]:
len(attr_data)

531

In [57]:
len(attr_TT)

173

In [68]:
import pickle

In [79]:
valid_set = set( OMNetData.keys()).intersection(attr_TT.keys())
crm_profile = {}
twitter_profile = {}
for k in valid_set:
    crm_profile[k] = attr_TT[k]
    twitter_profile[k] = OMNetData[k]

In [90]:
with open('crm_profile.pickle', 'wb') as f:
    pickle.dump(crm_profile, f)

In [91]:
with open('twitter_profile.pickle', 'wb') as f:
    pickle.dump(twitter_profile, f)

In [92]:
tags = [set(v['Tags']) for k, v in attr_data.items() if v['Tags']]
with open('skill_occurance.pickle', 'wb') as f:
    pickle.dump(tags, f)