In [None]:
import pandas as pd
import networkx as nx
import community

In [23]:
'''Make unimodal network based of actors based on the books
they worked on.
@path: paths of actor,key.csv'''
def to_unimodal(*args):
    #Concat printers,publishers, and sellers in a dataframe
    l=[]
    for arg in args:
        df=pd.read_csv(arg,index_col=0,engine='python')
        df.columns=['key','person']
        l.append(df)
    df= pd.concat(l)
    # make edgelist
    edges=df.merge(df,on='key')
    edges=edges.drop('key',axis=1).rename(columns={
        'person_x':'Source','person_y':'Target'
    })
    edges=edges.query('Source<Target')
    
    #Create weight--edges attributes 
    edges['weight']=1
    edges=edges.groupby(['Source','Target']).sum().reset_index()
    G = nx.from_pandas_dataframe(edges,'Source','Target','weight')
    commun=community.best_partition(G)
    nx.set_node_attributes(G,'community',commun)
    nx.set_node_attributes(G, 'k-core', nx.core_number(G))
    nx.set_node_attributes(G,'betweenness',
                            nx.betweenness_centrality(G,k=1000, normalized=True, weight='weight'))
    #export Edges into a graphml file
    nx.write_graphml(G,'actors.graphml', encoding='utf-8')
    #export a graph where nodes are the communities
    G_commun=community.induced_graph(commun, G)
    nx.write_graphml(G_commun, 'actors_Community.graphml', encoding='utf-8')
    #export nodes to a csv
    name,attr=zip(*G.nodes(data=True))
    com=[k['community'] for k in attr]
    core=[k['k-core'] for k in attr]
    btn=[k['betweenness'] for k in attr]
    nodes_df=pd.DataFrame({'community':com,
                          'k-core':core,
                          'betweenness':btn},index=name)
    nodes_df.to_csv('actors.csv',encoding='utf-8')
    
    
    

In [37]:
def ppl_book(*args):
    l=[]
    for arg in args:
        df=pd.read_csv(arg,index_col=0,engine='python')
        df.columns=['key','person']
        l.append(df)
    df= pd.concat(l)
    df['weight']=1
    #Make edgelist
    edges=df.groupby(['key','person']).sum().reset_index()
    ppl_dict=pd.Series('People',index=edges['person'].unique()).to_dict()
    book_dict=pd.Series('Book',index=edges['key'].unique()).to_dict()
    ppl_dict.update(book_dict)
    edges.rename(columns={'person':'Source'
                          ,'key':'Target'},inplace=True)
     # make the people-book network
    G = nx.from_pandas_dataframe(edges,'Source','Target','weight')
    nx.set_node_attributes(G, 'Type', ppl_dict)
    nx.set_node_attributes(G, "k-core", nx.core_number(G))
    commun=community.best_partition(G)
    nx.set_node_attributes(G,"community",commun)
    nx.write_graphml(G, "ppl_book.graphml", encoding="utf-8")
    #export a graph where nodes are the communities
    G_commun=community.induced_graph(commun, G)
    nx.write_graphml(G_commun, 'ppl_book_Community.graphml', encoding='utf-8')
    #export nodes to a csv
    name,attr=zip(*G.nodes(data=True))
    com=[k['community'] for k in attr]
    core=[k['k-core'] for k in attr]
    #btn=[k['betweenness'] for k in attr]
    nodes_df=pd.DataFrame({'community':com,
                          'k-core':core},index=name)
    nodes_df.to_csv('ppl_book.csv',encoding='utf-8')
    

In [38]:
'''
@Path=csv with standard names as columns, and their
variations as rows,
@nonstd=a series of unstandardized people names
return a series of standardized people names'''
def regNm(path,nonstd=None):
    df=pd.read_csv(path)
    std_dict=pd.melt(df).dropna().set_index('value').to_dict()['variable']
    #Standardize names in a given series
    #a filter than modified varied names who exist in the dictionary
    def standardize_names(person):
        if person in std_dict:
            return std_dict[person]
        else:
            return person
    res=nonstd.map(standardize_names)
    return res