## Personalized Recommendation Model

### Interfaces: DataIn
- CRM Data Connection
- Twitter Crawler
- User interaction data

### Interfaces: DataOut

#### API Services
- What to work/learn/observe
- Whom to follow
- Partnership: With whom on what

### Data Objects
- SkillComplementarityModel
    - training/forming
    - given two nodes returning complementarity score
    - given a node returns the list of complementing nodes
- ExpertiseSeekingNet
- PotentialSocialNet
- UserBehavioralModel


### Backend Incremental Training Processes
- Update complementarity model from the profiles/projects.
- Update expertise net in terms of similarity and difference scores
- Update social similarity from localilty and twitter 

In [1]:
import pickle
import pprint
pp = pprint.PrettyPrinter(indent=0)

In [2]:
def load_demographic_profile(fname = './data/crm_profile.pickle'):
    with open(fname, 'rb') as f:
        pcrm = pickle.load(f)
    return pcrm

def load_expertise_profile(fname = './data/crm_profile.pickle'):
    with open(fname, 'rb') as f:
        pcrm = pickle.load(f)    
    skills = {k:set(v['Tags']) for k,v in pcrm.items() if v['Tags']}
    return skills

def load_skill_cooccurance(fname = './data/skill_occurance.pickle'):
    with open(fname, 'rb') as f:
        projects = pickle.load(f)
    return projects

def load_social_media_profile(fname = './data/twitter_profile.pickle'):
    with open(fname, 'rb') as f:
        ptwitter = pickle.load(f)
    return ptwitter

def load_behavioral_profile(fname = None): pass

def load_spirometer_profile(fname = None): pass

def load_values_profile(fname = None): pass

def get_a_dict_item(adict):
    k,v = adict.popitem()
    adict[k] = v
    return {k:v}

In [3]:
def form_potential_expertise_net(expertise_profiles, min_sim = 0.3, min_dif = 0.1):
    TsimK = min_sim
    TdifK = min_dif
    ExpertiseNet = dict()
    count = 0
    sims = 0
    for a in expertise_profiles.keys():
        for b in expertise_profiles.keys():
            count += 1
            key = (a,b)
            if a == b: continue
            if (b,a) in ExpertiseNet.keys(): continue
            sa = expertise_profiles[a]
            sb = expertise_profiles[b]
            nsa = len(sa)
            nsb = len(sb)
            dAB = len(sa - sb)
            dBA = len(sb - sa)
            AB = sa.intersection(sb)
            nAB = len(AB)
            if nAB > 0:
                simAB = nAB / (nsa * 1.0) if nsa != 0 else 0
                simBA = nAB / (nsb * 1.0) if nsb != 0 else 0
                difBA = dBA / (nsa * 1.0) if nsa != 0 else 0
                difAB = dAB / (nsb * 1.0) if nsb != 0 else 0
                if simAB  < TsimK: continue
                if simBA  < TsimK: continue
                if difAB  < TdifK: continue
                if difBA  < TdifK: continue
                ExpertiseNet[key] = {'KsimAB': simAB,
                                     'KsimBA': simBA,
                                     'KdifAB': difAB,
                                     'KdifBA': difBA
                                    }
                sims += 1
    print('Number of links: {}, Network density: {}'.format(sims, sims/count))
    return ExpertiseNet

    

In [4]:
def form_potential_social_net(social_profiles, min_sim = 0.1, min_dif = 0.3):
    TsimS = min_sim
    TdifS = min_dif
    OMNetData = social_profiles
    count = 0
    sims = 0
    SocialNet = dict()
    for from_id in OMNetData.keys():
        mutuals = list()
        outL = list()
        inL = list()
        potentials = list()
        for to_id in OMNetData.keys():
            if from_id == to_id: continue
            count += 1
            ifrom_id = int(OMNetData[from_id]['TwitterId'])
            ito_id = int(OMNetData[to_id]['TwitterId'])
            afriends = OMNetData[from_id]['flist']
            bfriends = OMNetData[to_id]['flist']
            common = afriends.intersection(bfriends) 
            ab = ito_id in afriends 
            ba = ifrom_id in bfriends   
            nA = len(afriends)
            nB = len(bfriends)
            nC = len(common) * 1.0
            sim = nC / nA if nA > 0 else 0
            dif = (nB - nC) / nA if nA > 0 else 100

            #Reciprocities
            if ab and ba: mutuals.append(to_id)
            elif ab: outL.append(to_id)
            elif ba: inL.append(to_id)
            else: pass

            if sim < TsimS: continue
            if dif < TdifS: continue
            potentials.append({'id': to_id, 'sim': sim, 'dif': dif})
            sims += 1

        SocialNet[from_id] = {
            'mutuals': mutuals,
            'in': inL,
            'out': outL,
            'potentials': potentials
        }
    print('Number of links: {}, Network density: {}'.format(sims, sims/count))
    return SocialNet

In [5]:
def form_cooccurances(attrs):
    alltags = set()
    for k in attrs:
        alltags = alltags.union(set(k))
    occurance = dict()
    for t in alltags:
        occurance[t] = {'f':0,'co':{}}
    for kset in attrs:
        for k in kset:
            occurance[k]['f'] += 1
            for t in kset:
                if t == k: continue
                if t in occurance[k]['co'].keys():
                    occurance[k]['co'][t] += 1
                else:
                    occurance[k]['co'][t] = 1
    return occurance

def get_complementarity_score(a,b,occurances):
    """ To what extend a complements b. 
    
    For instance if a happens to occur/needed all the time b requested,
    then this would lead to a score of 1.
    """
    if a not in occurances.keys(): return(-1)
    if b not in occurances.keys(): return(-1)
    fb = occurances[b]['f']
    if b not in occurances[a]['co'].keys(): return(0)
    cab = occurances[a]['co'][b]
    if not fb: return(0)
    return (1.0 * cab / fb)

def recommend_topic(pA,pB,occurances):
    commonset = set(pA).intersection(set(pB))
    unionset =  set(pA).union(set(pB))
    disjointset = unionset - commonset
    
    complementarity = dict()
    
    for d in disjointset:
        complementarity[d] = 0
        for c in commonset:
            complementarity[d] += get_complementarity_score(d,c,occurances)
    compplementarity = sorted(complementarity.items(), key=lambda x: x[1], reverse=True)
    return({'common': commonset, 'complement': compplementarity})

### Load CRM profile

In [6]:
CRM_PROFILES = load_demographic_profile(fname = './data/crm_profile.pickle')
len(CRM_PROFILES)
print(get_a_dict_item(CRM_PROFILES))

{'155499906': {'Gender': 'F', 'Type': 'P', 'Location': 'SP', 'Twitter': 'EndeFEng', 'Tags': {'innovation', 'engineering', 'energy', 'industry40', 'entrepreneur'}}}


### Load expertise profile

In [7]:
EXPERTISE_PROFILES = load_expertise_profile(fname = './data/crm_profile.pickle')
len(EXPERTISE_PROFILES)
print(get_a_dict_item(EXPERTISE_PROFILES))

{'155499906': {'innovation', 'engineering', 'energy', 'industry40', 'entrepreneur'}}


### Load Twitter connections

In [8]:
TWITTER_PROFILES = load_social_media_profile(fname = './data/twitter_profile.pickle')
len(TWITTER_PROFILES)
print(get_a_dict_item(TWITTER_PROFILES))

{'155499906': {'TwitterName': 'EndeFEng', 'nf': 262, 'flist': {804379435487162368, 830022487954092032, 740123094480781312, 757942117817782273, 800740677449490433, 716738616354070528, 755365692787032065, 793898688238198784, 766538603602120705, 715555772311150592, 763325727525732352, 274958853, 1462596110, 1055591952, 1438617104, 90877970, 259823635, 1856644122, 275608091, 4733045788, 453975069, 1363786782, 582066208, 1246637089, 261177890, 182320162, 150963748, 526224420, 2570518567, 250523687, 4860355115, 372724268, 793673772, 279153198, 257460272, 2878792753, 292966964, 448738356, 1097571896, 125534265, 19923515, 3104117308, 414819902, 1891878464, 1108940864, 4491979335, 223428680, 2566593608, 296152650, 1409910859, 1897047114, 366500942, 193648207, 145681999, 533175894, 247237207, 458496089, 583086682, 95951964, 500945500, 1074208862, 96968287, 1152162912, 284092512, 733091426, 1336766556, 15764581, 2531465830, 806249580, 156224108, 245796974, 14786159, 1030213232, 460954733, 1344809

### Load skills co-occurances data

In [9]:
SKILLS = load_skill_cooccurance()
SKILLS[0:3]

[{'design', 'innovation', 'social'},
 {'ciao', 'js', 'python'},
 {'design', 'humanitarian', 'industrial', 'innovation', 'supplychain'}]

In [10]:
COOCURANCE_MATRIX = form_cooccurances(SKILLS)

### Form skill complementarity model
For the time being it is a skill-skill complemenatrity based on their co-occurances in maker projects.

In [11]:
help(get_complementarity_score)

Help on function get_complementarity_score in module __main__:

get_complementarity_score(a, b, occurances)
    To what extend a complements b. 
    
    For instance if a happens to occur/needed all the time b requested,
    then this would lead to a score of 1.



In [12]:
skill_a = 'js'
skill_b = 'python'
print(get_complementarity_score(skill_a,skill_b,COOCURANCE_MATRIX))
print(get_complementarity_score(skill_b,skill_a,COOCURANCE_MATRIX))

0.2857142857142857
1.0


In [13]:
skill_a = 'design'
skill_b = 'social'
print(get_complementarity_score(skill_a,skill_b,COOCURANCE_MATRIX))
print(get_complementarity_score(skill_b,skill_a,COOCURANCE_MATRIX))

0.14285714285714285
0.02197802197802198


### Choose makers

In [14]:
maker_a = '140411415'
maker_b = '155499906'

In [15]:
pp.pprint(CRM_PROFILES[maker_a])

{'Gender': 'F',
'Location': 'IT',
'Tags': {'collaborativeeconomy',
        'innovation',
        'openmaker team member',
        'socialinnovation'},
'Twitter': 'LaMartelloni',
'Type': 'P'}


In [16]:
pp.pprint(CRM_PROFILES[maker_b])

{'Gender': 'F',
'Location': 'SP',
'Tags': {'innovation', 'engineering', 'energy', 'industry40', 'entrepreneur'},
'Twitter': 'EndeFEng',
'Type': 'P'}


In [17]:
expertise_a = EXPERTISE_PROFILES[maker_a]
expertise_b = EXPERTISE_PROFILES[maker_b]
suggestions = recommend_topic(expertise_a,expertise_b,COOCURANCE_MATRIX)
common_interests = suggestions['common']
possible_joint_themes = suggestions['complement']

In [18]:
print(common_interests)

{'innovation'}


In [19]:
pp.pprint(possible_joint_themes)

[('engineering', 0.11724137931034483),
('industry40', 0.06896551724137931),
('socialinnovation', 0.05517241379310345),
('entrepreneur', 0.041379310344827586),
('energy', 0.027586206896551724),
('openmaker team member', 0.013793103448275862),
('collaborativeeconomy', 0.013793103448275862)]


### Compute expertise similarities and differences

In [20]:
PotentialExpertiseNet = form_potential_expertise_net(EXPERTISE_PROFILES, min_sim = 0.2, min_dif = 0.2)

Number of links: 3387, Network density: 0.1374092255263905


In [21]:
PotentialExpertiseNet[(maker_a, maker_b)]

{'KdifAB': 0.6, 'KdifBA': 1.0, 'KsimAB': 0.25, 'KsimBA': 0.2}

### Compute social similarities and differences 

In [22]:
PotentialSocialNet = form_potential_social_net(TWITTER_PROFILES, min_sim = 0.2, min_dif = 0.2)

Number of links: 171, Network density: 0.00698187163155316


In [23]:
print(maker_a, maker_b)

140411415 155499906


In [24]:
PotentialSocialNet[maker_a]

{'in': [],
 'mutuals': ['145489262',
  '153757086',
  '140411756',
  '152031440',
  '155714799',
  '153862027',
  '135377966',
  '156612789'],
 'out': ['154747461',
  '152583764',
  '156582037',
  '152730563',
  '153818672',
  '153689099',
  '157784656'],
 'potentials': [{'dif': 2.4761904761904763,
   'id': '152583764',
   'sim': 0.23015873015873015},
  {'dif': 5.476190476190476, 'id': '145489262', 'sim': 0.23809523809523808},
  {'dif': 21.841269841269842, 'id': '153818672', 'sim': 0.30158730158730157},
  {'dif': 8.134920634920634, 'id': '153757086', 'sim': 0.21428571428571427},
  {'dif': 2.9365079365079363, 'id': '140411756', 'sim': 0.5079365079365079},
  {'dif': 4.9523809523809526, 'id': '157784656', 'sim': 0.2222222222222222},
  {'dif': 2.238095238095238, 'id': '155714799', 'sim': 0.2857142857142857},
  {'dif': 12.642857142857142, 'id': '153862027', 'sim': 0.24603174603174602},
  {'dif': 2.8095238095238093, 'id': '135377966', 'sim': 0.5476190476190477}]}

In [25]:
print(maker_a, maker_b)

140411415 155499906


In [26]:
PotentialSocialNet[maker_b]

{'in': [], 'mutuals': [], 'out': [], 'potentials': []}

In [27]:
from scipy.stats import gmean 