# Social Network Analysis

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from scipy import stats
import igraph as ig
import networkx as nx

## Descriptive Gender Analysis

In [2]:
#Load data into dataframe
nodes = pd.read_csv(r'/Users/PJMoon/Data_Science_Projects/SNA/Assortativity/Nodes with attributes.csv')
link = pd.read_csv(r'/Users/PJMoon/Data_Science_Projects/SNA/Assortativity/Links - Intragroup.csv')

#Select columns of interst from node data 
nodes = nodes[['ID','Gender']]

#Out-degree statistics by Gender and Label
link_source = link.rename(columns = {'Source': "ID"})
out_degree = pd.merge(link_source,nodes,how='left',on = 'ID')
by_label_out_degree = out_degree.groupby(['Gender','Label']).ID.count().reset_index()

#In-degree statistics by Gender and Label
link_source2 = link.rename(columns = {"Target":"ID"})
in_degree = pd.merge(link_source2,nodes,how='left',on = 'ID')
by_label_in_degree = in_degree.groupby(['Gender','Label']).ID.count().reset_index()

#Gender to gender in and out degree statistics 
in_degree.rename(columns = {'Gender': 'Gender_in','ID': 'Target2','Source':'Source2','Label':'Label2'},inplace=True)
out_degree.rename(columns = {'Gender': 'Gender_out','ID': 'Source'},inplace=True)
combined = pd.concat([out_degree,in_degree],axis=1)
combined = combined[['Source','Target','Label','Gender_out','Gender_in']]
gender_to_gender = combined.groupby(['Gender_out','Gender_in']).Source.count().reset_index()

In [3]:
#print nodes
nodes

Unnamed: 0,ID,Gender
0,428634,Female
1,428764,Female
2,428881,Female
3,428738,Female
4,428803,Female
...,...,...
607,428244,Female
608,428192,Male
609,428231,Male
610,442219,Male


In [4]:
#print a number of out-degree by gender and label
by_label_out_degree

Unnamed: 0,Gender,Label,ID
0,Female,Access,96
1,Female,Career Development,165
2,Female,Decision Making,230
3,Female,Energy,321
4,Female,Innovation,220
5,Female,Personal Support,234
6,Female,Problem Solving,298
7,Female,Strategy,196
8,Female,Trust,274
9,Male,Access,133


In [5]:
#print a number of in-degree by gender and label
by_label_in_degree

Unnamed: 0,Gender,Label,ID
0,Female,Access,62
1,Female,Career Development,135
2,Female,Decision Making,199
3,Female,Energy,308
4,Female,Innovation,198
5,Female,Personal Support,213
6,Female,Problem Solving,247
7,Female,Strategy,117
8,Female,Trust,237
9,Male,Access,167


In [6]:
#print a number of degrees between genders
gender_to_gender

Unnamed: 0,Gender_out,Gender_in,Source
0,Female,Female,1010
1,Female,Male,1024
2,Male,Female,706
3,Male,Male,2353


In [7]:
#Gender to gender statistics by label
label = combined['Label'].unique()
for i in range(0,len(label)):
    sector = combined[combined['Label'] == label[i]]
    a = sector.groupby(['Gender_out','Gender_in']).Source.count().reset_index()
    b= print('{0}'.format(label[i]),a)

Energy   Gender_out Gender_in  Source
0     Female    Female     190
1     Female      Male     131
2       Male    Female     118
3       Male      Male     327
Trust   Gender_out Gender_in  Source
0     Female    Female     137
1     Female      Male     137
2       Male    Female     100
3       Male      Male     330
Career Development   Gender_out Gender_in  Source
0     Female    Female      81
1     Female      Male      84
2       Male    Female      54
3       Male      Male     182
Personal Support   Gender_out Gender_in  Source
0     Female    Female     154
1     Female      Male      80
2       Male    Female      59
3       Male      Male     201
Decision Making   Gender_out Gender_in  Source
0     Female    Female     114
1     Female      Male     116
2       Male    Female      85
3       Male      Male     294
Access   Gender_out Gender_in  Source
0     Female    Female      34
1     Female      Male      62
2       Male    Female      28
3       Male      Male     10

## Gender Assortativity 

In [8]:
#Link data preprocessing
a = [i for i in range(0,612)]
nodes['ID2']= a
nodes2 = nodes.rename(columns = {'ID':'Target','ID2':'ID_Target'})
link2 = link[['Source','Target']]
link2 = link2.rename(columns = {'Source' : 'ID'})
link3 = pd.merge(link2,nodes,how='left')
link4 = pd.merge(link3,nodes2,how='left',on='Target')
link_final = link4[['ID2','ID_Target']]
link_final = link_final.rename(columns = {'ID2':'Source','ID_Target':'Target'})
link_final['link'] = list(zip(link_final['Source'],link_final['Target']))
link_final = link_final['link']

#Node data preprocessing
nodes_network = nodes[['ID','Gender']]
nodes_network['Gender2'] = nodes_network['Gender'].map({'Female':1, 'Male':2})

#Creating a network and compute the assortativity
nodes_network['ID'] = [i for i in range(0,612)]
g = ig.Graph()
g.add_vertices([i for i in range(0,612)])
g.add_edges(link_final)
g.vs['Gender'] = nodes_network['Gender2']
print('The assortativity of the network is {0}'.format(g.assortativity_nominal('Gender',directed=True)))

The assortativity of the network is 0.26986616946343467


In [9]:
g.assortativity_degree(directed=True)

0.04431813418505621

In [10]:
layout = g.layout('kamada_kawai')

In [11]:
plot(g,layout = layout)

NameError: name 'plot' is not defined

## Gender Assortativity By Label

In [41]:
#Link data preprocessing
link_label = link[['Source','Target','Label']]
link_label = link_label.rename(columns = {'Source' : 'ID'})
link_label2=pd.merge(link_label,nodes,how='left')
link_label3 = pd.merge(link_label2,nodes2,how='left', on='Target')
link_label_final = link_label3[['ID2','ID_Target','Label']]
link_label_final = link_label_final.rename(columns = {'ID2': 'Source','ID_Target':'Target'})
link_label_final['link'] = list(zip(link_label_final['Source'],link_label_final['Target']))
link_label_final = link_label_final[['link','Label']]

#create link data with label
labels = link_label_final['Label'].unique()
link_data_by_label = []
for label in labels:
    new_label = link_label_final[link_label_final['Label'] == label]
    link_data_by_label.append(new_label)
    
    
#create a network and compute the assor    
node_final = [i for i in range(0,612)]
assortativity=[]
for datum in link_data_by_label:
    g = ig.Graph()
    g.add_vertices(node_final)
    g.add_edges(datum['link'])
    g.vs['Gender'] = nodes_network['Gender2']
    temp = g.assortativity_nominal('Gender',directed = True)
    assortativity.append(temp)
    
assortativity_label = {key: value for key, value in zip(labels,assortativity)}

In [48]:
#print assortativity by label
assortativity_label

{'Energy': 0.32838603700436797,
 'Trust': 0.27198947568214993,
 'Career Development': 0.2650996015936256,
 'Personal Support': 0.4321064231868238,
 'Decision Making': 0.276715679757505,
 'Access': 0.1303797468354431,
 'Innovation': 0.23365849436840877,
 'Problem Solving': 0.24722893422027978,
 'Strategy': 0.0722488805555895}