In [1]:
import pandas as pd
import networkx as nx
import community

import operator

In [2]:
df = pd.read_csv('../tags_with_wiki_relationship.csv')
df

Unnamed: 0,source,target,weight
0,c#,decimal,3081
1,c#,double,2895
2,c#,floating-point,1946
3,c#,type-conversion,4426
4,decimal,double,790
...,...,...,...
936601,database,parcelable,3
936602,web-config,windows-phone-7,4
936603,drag-and-drop,lag,12
936604,animation,markup,14


In [3]:
df_wiki = pd.read_csv('../tags_with_wiki_and_category.csv', lineterminator='\n')
df_wiki

Unnamed: 0,TagName,Body,root
0,java,java is a high-level object oriented programmi...,language
1,regex,Regular expressions provide a declarative lang...,
2,xml,xml (extensible markup language) is a structur...,format
3,mysql,mysql is a free open source relational databas...,system
4,nant,nant is a build tool for .net aiming to suppor...,tool
...,...,...,...
41778,minimal-apis,Minimal APIs were introduced in ASP.NET Core 6...,
41779,ix.net,the interactive extensions (ix) is a .net libr...,library
41780,amazon-memory-db,memorydb for redis is an in-memory database se...,service
41781,decomposition,Decomposition might refer to Time Series Decom...,


In [4]:
node_attr = df_wiki.set_index('TagName').to_dict('index')

In [5]:
Graphtype = nx.Graph()
G = nx.from_pandas_edgelist(df, edge_attr='weight', create_using=Graphtype)
nx.set_node_attributes(G, node_attr)

In [6]:
G.nodes['c#']

{'Body': 'c# (pronounced "see sharp") is a high level statically typed multi-paradigm programming language developed by microsoft.',
 'root': 'language'}

In [7]:
G['c#']



In [8]:
# Find modularity
part_1 = community.best_partition(G, random_state = 27)
mod_1 = community.modularity(part_1,G)

In [9]:
part_1['c#']

0

In [10]:
number_of_comm_1 = max(part_1.items(), key=operator.itemgetter(1))[1] + 1
number_of_comm_1

20

In [11]:
list_of_comm_1 = []
for i in range(number_of_comm_1):
    list_of_comm_1.append([k for k,v in part_1.items() if v == i])
    
list_of_comm_1[part_1['c#']]

['c#',
 '.net',
 '.net-3.5',
 'linq',
 'web-services',
 'datatable',
 'timer',
 'vb.net',
 'architecture',
 'com-interop',
 'asp.net',
 'sitemap',
 'printing',
 'globalization',
 'generics',
 'com',
 'vb6',
 'pocketpc',
 'data-binding',
 'winforms',
 'pdf',
 'unit-testing',
 'medium-trust',
 'exchange-server',
 'visual-studio',
 'lambda',
 'active-directory',
 'iis',
 'nunit',
 'legacy',
 'oledb',
 'visual-foxpro',
 'reflection',
 'installation',
 'cruisecontrol.net',
 'msbuild',
 'documentation',
 'sandcastle',
 'xml-comments',
 'localization',
 'windows-server-2003',
 'chm',
 'winhelp',
 'viewstate',
 'nant',
 'nhibernate',
 'subsonic',
 'windows-server-2008',
 'dependency-properties',
 'silverlight',
 'wpf',
 'projects-and-solutions',
 'asp.net-mvc',
 'datacontext',
 'linq-to-sql',
 'throttling',
 'tdd',
 'design-patterns',
 'model-view-controller',
 'mvp',
 '.net-2.0',
 'clr',
 'web-controls',
 'master-pages',
 'interop',
 'dataset',
 'url-routing',
 'asp.net-ajax',
 'keyboard-shor

In [12]:
G_1 = G.subgraph(list_of_comm_1[part_1['c#']])

In [13]:
G_1.edges

EdgeView([('checkboxlist', 'asp.net-mvc'), ('checkboxlist', '.net'), ('checkboxlist', 'c#'), ('checkboxlist', 'asp.net'), ('checkboxlist', 'user-controls'), ('checkboxlist', 'updatepanel'), ('checkboxlist', '.net-3.5'), ('checkboxlist', 'formview'), ('checkboxlist', 'objectdatasource'), ('checkboxlist', 'viewstate'), ('checkboxlist', 'binding'), ('checkboxlist', 'data-binding'), ('checkboxlist', 'linq'), ('checkboxlist', 'properties'), ('checkboxlist', 'asp.net-3.5'), ('checkboxlist', 'postback'), ('checkboxlist', 'vb.net'), ('checkboxlist', 'controls'), ('checkboxlist', 'radiobuttonlist'), ('checkboxlist', 'webforms'), ('checkboxlist', 'linq-to-sql'), ('checkboxlist', 'dotnetnuke'), ('checkboxlist', 'customvalidator'), ('checkboxlist', 'nullreferenceexception'), ('checkboxlist', 'panel'), ('checkboxlist', 'mvccontrib'), ('checkboxlist', 'linkbutton'), ('checkboxlist', 'asp.net-mvc-2'), ('checkboxlist', 'model-binding'), ('checkboxlist', 'selecteditem'), ('checkboxlist', 'linq-to-entit

In [14]:
part_2 = community.best_partition(G_1)
mod_2 = community.modularity(part_2,G_1)

In [15]:
sorted(G_1.degree, key=lambda x: x[1], reverse=True)

[('c#', 2395),
 ('.net', 2097),
 ('asp.net', 1478),
 ('vb.net', 1081),
 ('wpf', 951),
 ('visual-studio-2010', 892),
 ('asp.net-mvc', 881),
 ('visual-studio', 880),
 ('silverlight', 797),
 ('wcf', 783),
 ('winforms', 773),
 ('visual-studio-2008', 735),
 ('.net-4.0', 673),
 ('.net-3.5', 639),
 ('c#-4.0', 638),
 ('web-services', 617),
 ('asp.net-mvc-3', 588),
 ('linq', 558),
 ('unit-testing', 540),
 ('asp.net-mvc-2', 523),
 ('iis', 473),
 ('entity-framework', 449),
 ('silverlight-4.0', 438),
 ('data-binding', 435),
 ('sharepoint', 428),
 ('nhibernate', 413),
 ('linq-to-sql', 409),
 ('xaml', 388),
 ('reflection', 371),
 ('design-patterns', 366),
 ('architecture', 366),
 ('windows-phone-7', 361),
 ('binding', 360),
 ('model-view-controller', 359),
 ('mvvm', 356),
 ('generics', 355),
 ('.net-2.0', 354),
 ('iis-7', 348),
 ('c#-3.0', 344),
 ('serialization', 338),
 ('installation', 334),
 ('com', 332),
 ('mono', 331),
 ('user-controls', 330),
 ('controls', 328),
 ('webforms', 311),
 ('properti

In [16]:
f = open("../filter/final_categories.txt", "r")
categories = f.read().split(", ")
categories

['component',
 'database',
 'extension',
 'environment',
 'language',
 'library',
 'platform',
 'subsystem',
 'framework',
 'ide',
 'tool',
 'toolkit',
 'utility',
 'system']

In [17]:
for a, b in sorted(G_1.degree, key=lambda x: x[1], reverse=True):
    if str(G.nodes[a]['root']) != "nan" and str(G.nodes[a]['root']) in categories:
        print(a + "," + str(b) + "," + G.nodes[a]['root'] + "," + node_attr[a]["Body"])
    #else:
    #    print(a + " " + str(b))

c#,2395,language,c# (pronounced "see sharp") is a high level statically typed multi-paradigm programming language developed by microsoft.
.net,2097,framework,the .net framework is a software framework designed mainly for the microsoft windows operating system.
asp.net,1478,framework,asp.net is a microsoft web application development framework that allows programmers to build dynamic web sites web applications and web services.
vb.net,1081,language,visual basic.net (vb.net) is a multi-paradigm managed type-safe object-oriented computer programming language.
wpf,951,subsystem,windows presentation foundation or wpf is a subsystem for rendering user interfaces in windows-based applications.
visual-studio-2010,892,environment,visual studio 2010 is an integrated development environment (ide) from microsoft.
asp.net-mvc,881,framework,the asp.net mvc framework is an open source web application framework and tooling that implements a version of the model-view-controller (mvc) pattern tailored t