In [94]:
import pandas as pd
import networkx as nx
import numpy as np

In [2]:
commits = pd.read_feather('../data/commits.feather')
commits.head()

Unnamed: 0,commit_id,project_id,project_name,parent_id
0,321132855,26730207,meta-ivi,321132856.0
1,333294039,27617768,ExcelLaunchPad,333294036.0
2,302312189,24513310,yii2-webception,302312191.0
3,124854783,3452681,mammoth.js,124843456.0
4,423307958,32130467,phase-0,423307476.0


In [3]:
project_names_sample = commits[['project_name']].sample(5, random_state = 2019)
commits_sample = commits.loc[commits['project_name'].isin(project_names_sample.project_name.values)]
commits_sample.head()

Unnamed: 0,commit_id,project_id,project_name,parent_id
312,57035161,3526154,oneclick,57035155.0
313,162455517,3526154,oneclick,162450416.0
3666,61587113,3526154,oneclick,61539488.0
3667,269200796,3526154,oneclick,269200795.0
5391,39513853,3526154,oneclick,39513850.0


In [4]:
graphs = {}

In [39]:
for project in project_names_sample.project_name.values:
    graph_df = commits_sample.loc[commits_sample['project_name'] == project]
    graph_df = graph_df[['commit_id', 'parent_id']]
    graph_df.columns = pd.Index(['target', 'source'])
    
    graph = nx.from_pandas_edgelist(graph_df, create_using=nx.DiGraph)
    graph.name = project
    graphs[project] = graph

In [66]:
repo = graphs['javarosa']

In [63]:
x = list(nx.degree(test_repo))

### Features

- Number of Nodes
- Number of Edges
- Density
- Average Clustering Coefficient
- Transitivity
- Weakly Connected
- Number of Weakly Connected
- Number attracting components

In [87]:
num_nodes = repo.number_of_nodes()
num_edges = repo.number_of_edges()

density = nx.density(repo)
avg_clustering = nx.algorithms.cluster.average_clustering(repo)
transitivity = nx.algorithms.cluster.transitivity(repo)

weakly_connected = nx.algorithms.components.is_weakly_connected(repo)
num_weakly = nx.algorithms.components.number_weakly_connected_components(repo)
num_attracting = nx.algorithms.components.number_attracting_components(repo)

In [102]:
degree_centrality = nx.algorithms.centrality.degree_centrality(repo).values()
in_degree_centrality = nx.algorithms.centrality.in_degree_centrality(repo).values()
out_degree_centrality = nx.algorithms.centrality.out_degree_centrality(repo).values()

In [111]:
eigen_centrality = nx.algorithms.centrality.eigenvector_centrality(repo, max_iter=int(1e6)).values()
katz_centrality = nx.algorithms.centrality.katz_centrality(repo, max_iter=int(1e6)).values()
#between_centrality = nx.algorithms.centrality.current_flow_betweenness_centrality(repo).values()
#closeness_centrality = nx.algorithms.centrality.current_flow_closeness_centrality(repo).values()
num_triangles = nx.algorithms.cluster.clustering(repo).values()

In [114]:
degree = list(nx.degree(repo))

In [118]:
in_degree = list(repo.in_degree)
out_degree = list(repo.out_degree)

[(502161249.0, 1),
 (502161259, 1),
 (502158725.0, 1),
 (502158735, 1),
 (502161188.0, 1),
 (502161201, 1),
 (502158746.0, 1),
 (502158748, 1),
 (502158764.0, 1),
 (502158765, 1),
 (502157241.0, 1),
 (502157242, 1),
 (502161176.0, 1),
 (502161194, 1),
 (502161313.0, 1),
 (502161321, 1),
 (502161303.0, 1),
 (502161312, 1),
 (502158744.0, 1),
 (502157215.0, 1),
 (502157221, 1),
 (502161189.0, 1),
 (502161198, 1),
 (502161264.0, 1),
 (502161273, 1),
 (502161298.0, 1),
 (502161308, 1),
 (502158766, 1),
 (502157153.0, 1),
 (502157130, 1),
 (502161304.0, 1),
 (502157235.0, 1),
 (502157236, 1),
 (502161233.0, 1),
 (502161239, 1),
 (502161150.0, 1),
 (502161138, 1),
 (502161269.0, 1),
 (502161278, 1),
 (502161140.0, 1),
 (502161129, 1),
 (502161227.0, 1),
 (502161241, 1),
 (502161154.0, 1),
 (502161167, 1),
 (502161320.0, 1),
 (502161328, 1),
 (502158739.0, 1),
 (502158726, 2),
 (502161163, 1),
 (502161252.0, 1),
 (502161263, 1),
 (502161270.0, 1),
 (502161286, 1),
 (502158700.0, 1),
 (5021587

- Degree Centrality **dict**
    - networkx.algorithms.centrality.degree_centrality
- In-Degree Centrality **dict**
    - networkx.algorithms.centrality.in_degree_centrality
- Out-Degree Centrality **dict**
    - networkx.algorithms.centrality.out_degree_centrality
- Eigenvector Centrality **dict**
    - networkx.algorithms.centrality.eigenvector_centrality
- Katz Centrality **dict**
    - networkx.algorithms.centrality.katz_centrality
- Current-flow Closeness Centrality **dict**
    - networkx.algorithms.centrality.current_flow_closeness_centrality
- Betweenness Centrality **dict**
    - networkx.algorithms.centrality.betweenness_centrality
- Clustering **dict**
    - networkx.algorithms.cluster.clustering
- Average degree (overall)
- Standard Deviation degree (overall)
- Average in degree
- Standard Deviation in degree
- Average out degree
- Standard Deviation out degree