<h1>Globals</h1>

In [1]:
from __future__ import print_function
from datetime import datetime
from git import Repo, Git
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import time
import os.path
import re, string, ntpath, keyword, json, codecs
import threading
import shutil, errno
import numpy as np
import pandas as pd

skip_projects = [ "meteor"]

tag_names = ["2016-06","2016-01","2015-06","2015-01","2014-06","2014-01","2013-06","2013-01",
"2012-06","2012-01","2011-06","2011-01","2010-06","2010-01","2009-06","2009-01",
"2008-06","2008-01","2007-06","2007-01","2006-06","2006-01","2005-06","2005-01",
"2004-06","2004-01","2003-06","2003-01"]

project_type_map = {
	"androidannotations-tags": ".java",
	"bigbluebutton-tags": ".java",
	"cassandra-tags": ".java",
	"elasticsearch-tags": ".java",
	"hibernate-orm-tags": ".java",
	"liferay-portal-tags": ".java",
	"netty-tags": ".java",
	"platform_frameworks_base-tags": ".java",
	"spring-framework-tags": ".java",
	"wildfly-tags": ".java",
	"laravel-tags": ".php",
	"symfony-tags": ".php",
	"cakephp-tags": ".php",
	"CodeIgniter-tags": ".php",
	"rails-tags": ".rb",
	"sinatra-tags": ".rb",
	"padrino-framework-tags": ".rb",
	"hanami-tags": ".rb",
	"pakyow-tags": ".rb",
	"flask-tags": ".py",
	"django-tags": ".py",
	"web2py-tags": ".py",
	"frappe-tags": ".py",
	"ninja-tags": ".java",
	"meteor-tags": "javascript",
	"express-tags": "javascript",
	"sails-tags": "javascript",
	"mean-tags": "javascript",
	"derby-tags": "javascript",
	"nodal-tags": "javascript"
}

def load_config(config_file):
    """
    Load projects configuration file.
    """
    with open(config_file) as data_file:    
        config_data = json.load(data_file)
    return config_data

def copy_folder(src, dst):
    try:
        if os.path.exists(dst):
            shutil.rmtree(dst)
        shutil.copytree(src, dst)
    except OSError as exc: # python >2.5
        if exc.errno == errno.ENOTDIR:
            shutil.copy(src, dst)
        else: raise
            
def get_immediate_subdirectories(a_dir):
    return [name for name in os.listdir(a_dir)
            if os.path.isdir(os.path.join(a_dir, name))]

# base_dir = "/home/hshahin/workspaces/Spring2016_SE_Project"
base_dir = "/home/doaa/Documents/Spring2016_SE_Project"

data_dir = os.path.join(base_dir , "data_out")
config_file = "projects_config.json"
config_data = load_config(os.path.join(base_dir , config_file))

In [2]:
# path = '/home/hshahin/workspaces/Spring2016_SE_Project/data'
# get_immediate_subdirectories(path)

<h1>Creating tags functions</h1>

In [3]:
def get_date_time(epoch):
    '''
    convert epoch to date_time
    '''
    return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(epoch))

def tag_exists(path, tag_name):
    repo = Repo(path)
    
    return True if tag_name in repo.tags else False

def get_epoch(year, month='01'):
    """
    calculate the epoch of first day of a year-month
    """
    pattern = '%Y.%m.%d %H:%M:%S'
    return int(time.mktime(time.strptime(str(year) + '.' + str(month) + '.01 00:00:00', pattern)))


def create_tags(path):
    '''
    takes repo path and creates tags for first commit in Jan and Jun. for every year
    # get the list of commits
    # get the latest commit date
    # current_year is the year from that date
    # loop through the list of commit to find the commit having a date equal or just after 1/1/current_year
    # once found create a tage with the current_year name on it AND
    # subtract 1 from the year and continue.

    '''
    repo = Repo(path)

    # get the list of commits
    commits = list(repo.iter_commits())

    # get the latest commit date, current_year is the year from that date
    current_year = datetime.fromtimestamp(commits[0].committed_date).year


    for idx, commit in enumerate(commits):
        # time.sleep(2)
        # print(commits[idx].hexsha)

        current_year_01 = str(current_year)+'-01'
        current_year_06 = str(current_year)+'-06'

        try:
            if get_epoch(current_year, '01') > commit.committed_date and \
                    int(time.time()) > get_epoch(current_year, '01')  and \
                    idx !=0:
                if str(current_year_01) not in repo.tags and idx != 0:
                    print(commits[idx-1].hexsha+' '+get_date_time(commits[idx-1].committed_date)+' '+current_year_01)
                    past = repo.create_tag(current_year_01, ref=commits[idx-1],
                                      message="This is a tag to mark the first commit in year %s" % current_year_01)
                current_year = datetime.fromtimestamp(commit.committed_date).year

            if get_epoch(current_year, '06') > commit.committed_date and \
                    int(time.time()) > get_epoch(current_year, '06') and \
                idx != 0:
                if str(current_year_06) not in repo.tags:
                    print(commits[idx-1].hexsha+' '+get_date_time(commits[idx-1].committed_date)+' '+current_year_06)
                    past = repo.create_tag(current_year_06, ref=commits[idx-1],
                                      message="This is a tag to mark the first commit in year %s" % current_year_06)
        except AttributeError:
            pass

def checkout_tag(path, tag_name):
    '''
    checks out a tag if it exists
    '''
    repo = Repo(path)
    git = Git(path)
    if tag_name in repo.tags:
        git.checkout(tag_name)

def delete_tags(path):
    '''
    remove all tags in a given repo
    '''

    repo = Repo(path)

    for tag in repo.tags:
        repo.delete_tag(tag)

<h1>Create tags every 6 months for each repo</h1>

In [15]:
for project_name, project_type in config_data.items():
    print("Processing project: " + project_name )
    t0 = time.time()
    delete_tags(os.path.join(data_dir, project_name))
    create_tags(os.path.join(data_dir, project_name))
    print("Project: " + project_name + " taged in %0.3fs." % (time.time() - t0))

<h1>Preprocesing functions</h1>

In [4]:
# Python keywords
python_keywords = keyword.kwlist

# Java keywords from https://docs.oracle.com/javase/tutorial/java/nutsandbolts/_keywords.html
java_keywords = ["abstract","continue","for","new","switch","assert","default","goto","package","synchronized",
                 "boolean","do","if","private","this","break","double","implements","protected","throw",
                 "byte","else","import","public","throws","case","enum","instanceof","return","transient","catch",
                 "extends","int","short","try","char","final","interface","static","void","class","finally","long",
                 "strictfp","volatile","const","float","native","super","while"]

# Ruby keywords from http://docs.ruby-lang.org/en/2.2.0/keywords_rdoc.html
ruby_keywords = ["__ENCODING__","__LINE__","__FILE__","BEGIN","END","alias","and","begin","break",
                 "case","class","def","defined?","do","else","elsif","end","ensure","false","for","if",
                 "in","module","next","nil","not","or","redo","rescue","retry","return","self","super",
                 "then","true","undef","unless","until","when","while","yield"]

# PHP keywords form http://php.net/manual/en/reserved.keywords.php
php_keywords = ["__halt_compiler","abstract","and","array","as","break","callable","case","catch","class","clone",
                "const","continue","declare","default","die","do","echo","else","elseif","empty","enddeclare",
                "endfor","endforeach","endif","endswitch","endwhile","eval","exit","extends","final","finally",
                "for","foreach","function","global","goto","if","implements","include","include_once","instanceof",
                "insteadof","interface","isset","list","namespace","new","or","print","private","protected",
                "public","require","require_once","return","static","switch","throw","trait","try","unset","use",
                "var","while","xor","yield"]

cpp_keywords = ["auto","const", "double", "float", "int", "short", "struct", "unsigned", "break", "continue",
                "else", "for", "long", "signed", "switch", "void", "case", "default", "enum", "goto", "register",
                "sizeof", "typedef", "volatile", "char", "do", "extern", "if", "return", "static", "union", "while", 
                "asm", "dynamic_cast", "namespace", "reinterpret_cast", "try", "bool", "explicit", "new", 
                "static_cast", "typeid", "catch", "false", "operator", "template", "typename", "class", "friend", 
                "private", "this", "using", "const_cast", "inline", "public", "throw", "virtual", "delete", "mutable", 
                "protected", "true", "wchar_t", "and", "bitand", "compl", "not_eq", "or_eq", "xor_eq", "and_eq",
                "bitor", "not", "or", "xor"]

c_sharp_keywords = ["abstract", "as", "base", "bool", "break", "byte", "case", "catch", "char", "checked", "class", 
                    "const", "continue", "decimal", "default", "delegate", "do", "double", "else", "enum", "event",
                    "explicit", "extern", "false", "finally", "fixed", "float", "for", "foreach", "goto", "if", 
                    "implicit", "in", "int", "interface", "internal", "is", "lock", "long", "namespace", "new", 
                    "null", "object", "operator", "out", "override", "params", "private", "protected", "public",
                    "readonly", "ref", "return", "sbyte", "sealed", "short", "sizeof", "stackalloc", "static",
                    "string", "struct", "switch", "this", "throw", "true", "try", "typeof", "uint", "ulong", 
                    "unchecked", "unsafe", "ushort", "using", "virtual", "void", "volatile", "while"]

javascript_keywords = ["abstract", "arguments", "boolean", "break", "byte", "case", "catch", "char", "class",
                       "const", "continue", "debugger", "default", "delete", "do", "double", "else", "enum",
                       "eval", "export", "extends", "false", "final", "finally", "float", "for", "function", 
                       "goto", "if", "implements", "import", "in", "instanceof", "int", "interface", "let", 
                       "long", "native", "new", "null", "package", "private", "protected", "public", "return",
                       "short", "static", "super", "switch", "synchronized", "this", "throw", "throws", 
                       "transient", "true", "try", "typeof", "var", "void", "volatile", "while", "with", "yield"]

coffeescript_keywords = ["case", "default", "function", "var", "void", "with", "const", "let", "enum", "export", 
                         "import", "native", "__hasProp", "__extends", "__slice", "__bind", "__indexOf", "implements",
                         "interface", "package", "private", "protected", "public", "static", "yield", "true", "false",
                         "null", "this", "new", "delete", "typeof", "in", "arguments", "eval", "instanceof", "return",
                         "throw", "break", "continue", "debugger", "if", "else", "switch", "for", "while", "do", "try",
                         "catch", "finally", "class", "extends", "super", "undefined", "then", "unless", "until", 
                         "loop", "of", "by", "when", "and", "or", "is", "isnt", "not", "yes", "no", "on", "off"]

R_keywords = ["if", "else", "repeat", "while", "function", "for in", "next", "break", "TRUE", "FALSE", "NULL", 
              "Inf", "NaN", "NA", "NA_integer_", "NA_real_", "NA_complex_", "NA_character_"]

typeScript_keywords = ["break", "as", "any", "case", "implements", "boolean", "catch", "interface", "constructor",
                       "class", "let", "declare", "const", "package", "get", "continue", "private", "module",
                       "debugger", "protected", "require", "default", "public", "number", "delete", "static",
                       "set", "do", "yield", "string", "else", "symbol", "enum", "type", "export", "from", 
                       "extends", "of", "false", "finally", "for", "function", "if", "import", "in", "instanceof", 
                       "new", "null", "return", "super", "switch", "this", "throw", "true", "try", "typeof", 
                       "var", "void", "while", "with"]
hashkell_keywords =  ["case","class","data","default","deriving","do","else","forall","if","import","in","infix",
                      "infixl","infixr","instance","let","module","newtype","of","qualified","then",
                      "type","where","foreign","ccall","as","safe","unsafe"]

all_keywords = python_keywords + java_keywords + ruby_keywords + php_keywords + cpp_keywords \
                + c_sharp_keywords + javascript_keywords + coffeescript_keywords + R_keywords \
                + typeScript_keywords + hashkell_keywords
    

all_keywords = np.unique(all_keywords).tolist()

EXTENTIONS = load_config(os.path.join(base_dir , 'language_extension.json'))

def path_leaf(path):
    head, tail = ntpath.split(path)
    return head, tail

# split camel case tokens
_underscorer1 = re.compile(r'(.)([A-Z][a-z]+)')
_underscorer2 = re.compile('([a-z0-9])([A-Z])')


def camel_to_spaces(s):
    """
    convert camel case into spaces seperated
    """
    subbed = _underscorer1.sub(r'\1 \2', s)
    return _underscorer2.sub(r'\1 \2', subbed).lower()

def snake_to_spaces(snake_cased_str):
    """
    convert snake case into spaces seperated
    """
    separator = "_"
    components = snake_cased_str.split(separator)
    if components[0] == "":
        components = components[1:]
    if components[-1] == "":
        components = components[:-1]
    if len(components) > 1:
        spaced_str = components[0].lower()
        for x in components[1:]:
            spaced_str += " " + x.lower()
    else:
        spaced_str = components[0]
    return spaced_str


def file_preprocessing(input_file, output_file):
    """
    - replace punctuations with spaces
    - stemming
    - camel to spaces and snake to spaces
    - remove language spesific keywords
    - write the entire project snapshot into one file under project root folder
    """
    # print("processing file " + input_file)
    # replace the punctuations with space
    replace_punctuation = str.maketrans(string.punctuation, ' '*len(string.punctuation))
    # stemming
    stemmer = PorterStemmer()

    with open(input_file, 'r', encoding='utf-8', errors='replace') as inFile, open(output_file,'w') as outFile:
        for line in inFile:
            # replace punctuations
            # convert camel case into space separated
            # convert snake case into space separated             
            # remove language keywords
            line_witout_puncs = ' '.join([snake_to_spaces(camel_to_spaces(word)) 
                                          for word in line.translate(replace_punctuation).split()
                                          if len(word) >=4 and word not in stopwords.words('english') 
                                          and word not in all_keywords])

            # stemming
            singles = []
            for plural in line_witout_puncs.split():
                try:
                    singles.append(stemmer.stem(plural))
                except UnicodeDecodeError:
                    print(plural) 

            line_stemmed = ' '.join(singles)
            print(line_stemmed, file=outFile)


def return_file_type(project_path, file_type): 
    if '.proc' in file_type:
        exten = EXTENTIONS[file_type.split('.')[0]]
        if type(exten) is list:
            extenstion = tuple(i+'.proc' for i in exten)
        else:
            extenstion = exten + '.proc'
    elif type(EXTENTIONS[file_type]) is list:
        extenstion = tuple(EXTENTIONS[file_type])
    else:
        extenstion = EXTENTIONS[file_type]
        
    project_files = [os.path.join(root, name)
             for root, dirs, files in os.walk(project_path)
             for name in files
             if name.endswith(extenstion)]
    return project_files

def project_preprocessing(project_path, file_type, tag_name):
    # print ("processing project "+ project_path)
    # process project source code files and save each file as .*.proc 
    project_files = return_file_type (project_path, file_type)
    for source_file in project_files:
        head, tail = path_leaf(source_file)
        proc_file = os.path.join(head , tail + '.proc')
        file_preprocessing(source_file, proc_file)
        
    # concatenate all processed project files into one file under root directory
    project_proc_files = return_file_type (project_path, file_type + '.proc')
    with open(os.path.join(project_path , "final-processed.out"), 'w') as outfile:
        for fname in project_proc_files:
            with open(fname) as infile:
                for line in infile:
                    outfile.write(line)

<h1>checkout tags in separate folders</h1>

In [6]:
# create folder project_tags
# for each tag if tag exists
# copy the project into project_tag/tag_name
# checkout ptoject to tag_name
# delete .git folder
def checkout_projects():
    for project_name, project_type in config_data.items():
        project_path = os.path.join(data_dir, project_name)
        project_tags_path = project_path + '-tags'
        
        if project_name not in skip_projects:
            if not os.path.exists(project_tags_path):
                os.makedirs(project_tags_path)

            repo = Repo(project_path)
            for tag_name in tag_names:
                if tag_exists(project_path, tag_name):
                    print("Copying "+project_name+' '+tag_name)
                    current_tag_path = os.path.join(project_tags_path, tag_name)
                    copy_folder(project_path, current_tag_path)

            for tag_name in tag_names:
                if tag_exists(project_path, tag_name):
                    print("Checkout "+project_name+' '+tag_name)
                    current_tag_path = os.path.join(project_tags_path, tag_name)
                    checkout_tag(current_tag_path, tag_name)

            for tag_name in tag_names:
                if tag_exists(project_path, tag_name):
                    print("deleting .git "+project_name+' '+tag_name)
                    current_tag_path = os.path.join(project_tags_path, tag_name)
                    os.chdir(current_tag_path)
                    shutil.rmtree(os.path.join(current_tag_path, '.git'))

checkout_projects()

<h1>Run preprocessing</h1>

In [7]:
import multiprocessing, traceback
import logging, sys, traceback

fh = logging.FileHandler(filename = os.path.join(base_dir , 'logs.log'))
fh.setLevel(logging.INFO)
logger = logging.getLogger()
logger.addHandler(fh)
logger.setLevel(logging.INFO)



def run_preprocessing_tags(project_tags_dir):
    project_tags_path = os.path.join(data_dir , project_tags_dir)
    logger.info('--------------- %', project_tags_path)

    for project_tag in get_immediate_subdirectories(project_tags_path):
        project_tag_path = os.path.join(project_tags_path , project_tag)
        t0 = time.time()
        project_preprocessing(project_tag_path, project_type_map[project_tags_dir], project_tag)
        logger.info("processing project: {} \t tag {} done in {} mins".format(project_tags_path,
                                    project_tag, (time.time() - t0)/60.))
    logger.info('****This thread is done: {}'.format(os.getpid()))

    
def run_preprocessing(project):
    ''' project = [name, group, type]
    '''
    if project[2] not in EXTENTIONS.keys():
        logger.info('skipping project {} of type {}.'.format(project[0], project[2]))
        
    logger.info(project)
    project_path = os.path.join(data_dir , project[0].lower())
    
    t0 = time.time()
    try:
        project_preprocessing(project_path, project[2], project[0])
    except:
        logger.info("Error in project {}".format(project))
    logger.info("***Processing project:{} done in {}min.".format(project_path, (time.time() - t0)/60.0))
    

# For categories
projects_list = load_config(os.path.join(base_dir , 'showcases_config.json'))
projects_list = pd.DataFrame(projects_list).T
projects_list.reset_index(inplace=True)
# projects_list
projects_list = projects_list.as_matrix()
logger.info('Start time: {}'.format(time.time()))
logger.info('all projects: {}'.format(projects_list.shape))


pool = multiprocessing.Pool(16)
pool.map(run_preprocessing, projects_list)
         
# #----------------- For tags
# # project_tags_paths = get_immediate_subdirectories(data_dir)
# # pool = Pool(16)
# # pool.map(run_preprocessing_tags, project_tags_paths)

    
logger.info('Main process Done..........................')

<h1>Topic Modeling</h1>

In [5]:
from __future__ import print_function
import os
import time
from os import listdir
from os.path import isdir

from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer, TfidfTransformer
from sklearn.decomposition import NMF, LatentDirichletAllocation
from sklearn.datasets import fetch_20newsgroups
import pandas as pd
import numpy as np
import pickle

# import lda

# pd.set_option('display.mpl_style', 'default') 
pd.set_option('display.width', 5000) 
pd.set_option('display.max_columns', 60) 
import matplotlib.pyplot as plt


def print_top_words(model, feature_names, n_top_words):
    for topic_idx, topic in enumerate(model.components_):
        print("Topic #%d:" % topic_idx)
        print(" ".join([feature_names[i] for i in topic.argsort()[:-n_top_words - 1:-1]]))
    print()

def get_top_words(model, feature_names, n_top_words):
    df = pd.DataFrame(columns=['word'+str(i) for i in range(n_top_words)])
    df_freq = pd.DataFrame(columns=['word'+str(i) for i in range(n_top_words)])
    for topic_idx, topic in enumerate(model.components_):
        df.loc['topic#'+str(topic_idx)] = [feature_names[i] for i in topic.argsort()[:-n_top_words - 1:-1]]
        df_freq.loc['topic#'+str(topic_idx)] = [topic[i] for i in topic.argsort()[:-n_top_words - 1:-1]]
    
    return df, df_freq


def print_full(x):
    pd.set_option('display.max_colwidth', 1000)
    print(x)
    pd.reset_option('display.max_rows')

# -------------------------------------------------------------------

def run_lda_sklearn(X):
        lda = LatentDirichletAllocation(n_topics=n_topics, 
                                    max_iter=400,
                                    learning_method='online', 
                                    learning_offset=50.,
                                    random_state=0, 
                                    n_jobs=10)
               
        projects_topics = lda.fit_transform(X)
                        
        return projects_topics, lda

# -------------------------------------------------------------------

def run_lda_other(X):
    lda = lda.LDA(n_topics=n_topics, n_iter=500, random_state=1)        
    lda.fit_transform(X)
   
    return lda.doc_topic_, lda

# -------------------------------------------------------------------

def get_projects_data_tags(selected_projects):
    '''Return array or strings of the project tags inside each project
        Read projects into strings
    '''
    projects_data = []
    project_names = []
    for i, project_name in enumerate(selected_projects):

        # For each snapshot of the project
        snapshots = []
        project_path = os.path.join(data_dir, project_name+'-tags')
        try:
            snapshots = [os.path.join(project_path, p) 
                         for p in listdir(project_path) if isdir(os.path.join(project_path, p))]
        except FileNotFoundError:
             print('------Project Not found: '+project_name)   
        for snapshot in snapshots:
            project_names.append('_'.join(snapshot.split('/')[-2:]))
            # print(project_names[-1])
            processed_path = os.path.join(snapshot, "final-processed.out")
            with open(processed_path, 'r') as myfile:
                projects_data.append(myfile.read().replace('\n', ' '))

    return project_names, projects_data

# ----------------------------------------------------------------

def get_projects_data(selected_projects):
    '''Return array or strings of the project
        Read projects into strings
    '''
    projects_data = []
    for i, project_name in enumerate(selected_projects):
        processed_path = os.path.join(data_dir, project_name.lower(), "final-processed.out")
        with open(processed_path, 'r') as myfile:
            projects_data.append(myfile.read().replace('\n', ' '))

    return projects_data

# ----------------------------------------------------------------

In [6]:
# Run LDA tests
# -------------------


n_features = 10000
n_topics = 25
# n_top_words = 100

max_df = 0.7
min_df = 0.3
lang = 'categories_fixed_'

TAGS = False

for n_topics in [50]:#[20,30, 40]:
    for max_df in [0.7]:#[0.8, 0.7, 0.6]:
        for min_df in [0.3]:#[0.2, 0.3, 0.1, 0.4]:
            suffix = lang+str(n_topics)+'_'+str(max_df)+'_'+str(min_df)
            print('------suffix:', suffix)

            if TAGS:
                #config_file = "projects_config_webFrameworks.json"
                #config_file = 'projects_config_topGithub.json'
                config_file = "projects_config.json"
                selected_projects = load_config(os.path.join(base_dir , config_file)).keys()
                project_names, projects_data = get_projects_data_tags(selected_projects)
            else:
                project_details = load_config(os.path.join(base_dir , 'showcases_config.json'))
                project_details = pd.DataFrame(project_details).T
                project_details.reset_index(inplace=True) 
                project_details.to_csv(os.path.join(base_dir , 'results/projects_details.csv'), index=None)
                project_names = project_details.ix[:,0]
                projects_data = get_projects_data(project_names)

            print('num of projects: ', len(projects_data))

            # Use tf (raw term count) features for LDA.
            print("Extracting tf features for LDA...")
            count_vectorizer = CountVectorizer(max_df=max_df, min_df=min_df, max_features=n_features, stop_words='english')
            tfidf_vectorizer = TfidfTransformer()

            t0 = time.time()
            counts = count_vectorizer.fit_transform(projects_data)
    #         tfidf = tfidf_vectorizer.fit_transform(counts)
            X = counts   # <---------
            tf_feature_names = count_vectorizer.get_feature_names()
            # tf_feature_names = tfidf_vectorizer.get_feature_names()
            print("done in %0.3fs." % (time.time() - t0))
            print('X shape:', X.shape)


            print("Fitting LDA models with tf features")

            t0 = time.time()       
    #         projects_topics, lda = run_lda_other(X) # counts 
            projects_topics, lda = run_lda_sklearn(X) # counts 
            topic_word, topic_word_freq = get_top_words(lda, tf_feature_names, 50)
            print("done in %0.3fs." % (time.time() - t0))

            # -------------------------------------------------------------------
            # Save lda into a pickle file        
    #         pickle.dump(lda, open(os.path.join(base_dir, 'results/lda_'+suffix+'.p'), 'wb'))

            pd.DataFrame(lda.components_).to_csv(os.path.join(base_dir, 'results/topic_word_raw_'+suffix+'.csv'))
            topic_word.to_csv(os.path.join(base_dir, 'results/topic_word_'+suffix+'.csv'))
            topic_word_freq.to_csv(os.path.join(base_dir, 'results/topic_word_freq_'+suffix+'.csv'))

            projects_topics = pd.DataFrame(projects_topics, columns=['topic'+str(i) for i in range(n_topics)]) 
            projects_topics['project'] = project_names
            projects_topics['project'] = projects_topics['project'].apply(lambda x: x.split('_')[0].split('-')[0])
            if TAGS:
                projects_topics['date'] = project_names
                projects_topics['date'] = projects_topics['date'].apply(lambda x: x.split('_')[1])
            else:
                projects_topics['date'] = '2016-01'
            projects_topics.index = project_names
            projects_topics.to_csv(os.path.join(base_dir, 'results/project-topic_'+suffix+'.csv'))

print('Tesing is Done............')
# lda = pickle.load(open("lda_5_1.p", "rb"))


------suffix: categories_fixed_50_0.7_0.3
num of projects:  103
Extracting tf features for LDA...
done in 44.963s.
X shape: (103, 2079)
Fitting LDA models with tf features
done in 402.681s.
Tesing is Done............


## 1- Topic-word

In [7]:
# print("\nTopics-words in LDA model:")

topic_word

Unnamed: 0,word0,word1,word2,word3,word4,word5,word6,word7,word8,word9,word10,word11,word12,word13,word14,word15,word16,word17,word18,word19,word20,word21,word22,word23,word24,word25,word26,word27,word28,word29,word30,word31,word32,word33,word34,word35,word36,word37,word38,word39,word40,word41,word42,word43,word44,word45,word46,word47,word48,word49
topic#0,summari,game,vertex,textur,assert,draw,resourc,button,graphic,devic,matrix,softwar,assembl,dispos,framework,obj,tool,microsoft,bitmap,bit,blend,sound,platform,pixel,compress,po,reader,contributor,rectangl,vertic,term,batch,rect,menu,enumer,clone,region,net,sender,mous,channel,app,contribut,audio,icon,subject,writer,angl,plugin,grant
topic#1,train,doubl,framework,abstract,score,cluster,predict,exp,flat,metric,dataset,apach,assert,conf,critic,logger,classifi,db,regress,parallel,transpos,concurr,warranti,agre,unless,rate,govern,complianc,obtain,matrix,softwar,basi,permiss,statist,repo,tail,infer,illeg,databas,varianc,2013,reject,learn,2016,accuraci,str,topic,popul,interfac,sum
topic#2,chart,dataset,axi,2015,radiu,accessor,legend,mixin,grid,bar,anim,domain,tooltip,box,draw,coordin,hover,bubbl,horizont,angl,eas,rect,redraw,prototyp,pixel,rotat,month,mock,stroke,extent,vertic,opac,year,interpol,moment,circl,rectangl,dash,plot,polygon,axe,zoom,anchor,day,canva,ordin,footer,famili,week,wiki
topic#3,axe,figur,bbox,plot,draw,axi,canva,patch,backend,angl,button,grid,vertic,tran,dict,func,mask,clip,marker,formatt,legend,major,tool,shape,arrow,norm,python,extent,coordin,rotat,tupl,interpol,segment,baselin,minor,scalar,triangl,vert,aspect,spec,widget,box,pixel,datetim,coord,xmin,fname,rectangl,unicod,press
topic#4,prototyp,builtin,session,elem,fn,entiti,expr,plugin,javascript,selector,bucket,exp,hook,statement,fragment,promis,splice,liter,editor,onload,css,emit,obj,cur,ajax,symbol,dep,indent,circl,auth,reject,defer,dest,web,cursor,func,assert,shape,blur,grid,div,packag,old,el,sandbox,xml,po,toggl,score,uri
topic#5,dom,shape,softwar,devic,light,textur,extent,vertex,world,materi,collis,vert,po,profil,scene,plane,draw,ctrl,techniqu,ref,rect,poli,mask,scalar,matrix,game,box,platform,mous,triangl,camera,obj,asset,assert,face,axi,cast,thread,vertic,bitmap,menu,ts,shadow,veloc,fn,physic,const,constraint,pipelin,anim
topic#6,cursor,menu,fname,mous,alloc,widget,fold,ctrl,strlen,scroll,button,scheme,redraw,visual,dialog,mask,retval,dict,scrollbar,unus,shell,clip,eval,proto,argv,spell,highlight,proc,indent,magic,draw,term,pixel,region,icon,utf8,func,unix,undo,conv,python,rect,spin,silent,expr,verbos,pend,score,signal,choic
topic#7,world,summari,0x,game,packag,builder,assert,button,scene,session,po,matrix,softwar,room,textur,shape,draw,asset,ff,func,resourc,cursor,templat,prototyp,say,dict,light,vertex,plugin,def,underscor,dir,selector,nbsp,anim,platform,apach,permiss,password,water,thread,cost,email,learn,owner,kernel,cur,dead,elem,editor
topic#8,0x,assert,apach,softwar,matrix,entiti,tile,proxi,geometri,textur,camera,button,vertex,draw,ptr,cursor,js,widget,anim,scene,func,rectangl,primit,kernel,prototyp,warranti,writabl,shape,scroll,resourc,doubl,vertic,permiss,lang,cluster,dir,int32,bit,devic,session,axi,io,train,complianc,interfac,materi,world,reader,graphic,mock
topic#9,world,player,game,mod,po,widget,attack,owner,softwar,palett,tile,button,sound,face,draw,anim,power,region,resourc,play,upgrad,foundat,term,public,2016,publish,2007,panel,enumer,vec,func,damag,preview,scroll,mous,notifi,dictionari,nbsp,mini,builder,dead,room,cursor,self,cost,sub,radiu,graphic,tooltip,overlay


In [9]:
topic_word_freq.head()


Unnamed: 0,word0,word1,word2,word3,word4,word5,word6,word7,word8,word9,word10,word11,word12,word13,word14,word15,word16,word17,word18,word19,word20,word21,word22,word23,word24,word25,word26,word27,word28,word29,word30,word31,word32,word33,word34,word35,word36,word37,word38,word39,word40,word41,word42,word43,word44,word45,word46,word47,word48,word49
topic#0,21552.380742,12294.977098,6516.738818,6087.294924,6008.147986,5594.012586,5290.001568,5289.7984,4772.104689,4576.230139,3866.929345,3814.044308,3584.157071,3300.09937,2968.472459,2840.668431,2539.777863,2535.010642,2472.614617,2470.533654,2346.700896,2340.764185,2272.507973,2195.889996,2056.320288,2042.780268,1995.085363,1952.73956,1937.833696,1859.7959,1824.828001,1799.496583,1798.512699,1746.143779,1681.9458,1677.732171,1643.300253,1638.065524,1623.704036,1593.158061,1590.655542,1578.41337,1572.495197,1543.811222,1513.794156,1493.737882,1447.199279,1434.320483,1426.370231,1410.292864
topic#1,3018.007977,2094.684938,1743.974247,1491.574937,1480.58186,1394.327717,1306.569099,1111.873326,1013.68093,970.754847,835.963402,787.986338,709.008214,678.672764,642.677354,623.417016,623.2201,559.108645,488.228487,484.780481,453.292177,422.611858,413.426374,402.635266,395.889444,391.970846,387.552412,387.501089,386.260517,386.256988,385.095678,383.580265,383.401906,358.832466,356.961762,351.703546,343.959871,323.648418,319.840535,310.804425,303.014625,300.244412,289.773816,285.102901,256.297711,249.886194,231.765295,226.089239,222.634903,219.724973
topic#2,18341.452165,3420.835441,3263.238821,2160.031477,2064.837104,1767.81191,1336.989451,1170.438607,1165.500533,1005.301985,1003.275646,958.597555,901.162246,885.822222,850.494446,816.956461,796.703976,791.42866,789.899194,741.478561,657.818598,630.16334,615.681301,601.977527,593.986468,588.535777,568.024085,563.377521,546.539774,512.769186,491.017431,471.017471,470.394976,468.844232,457.430042,381.149283,369.869437,368.743322,365.452224,359.95559,356.718175,347.583932,345.135047,335.600663,334.861755,334.208451,328.229774,316.916961,314.004358,310.078037
topic#3,6235.265531,5486.041714,3248.586988,3074.848605,2933.240467,2738.479903,2701.983747,2194.790166,2013.597121,1561.97131,1549.284769,1501.35143,1458.236615,1395.352274,1388.582608,1281.345967,1264.394704,1215.646463,1155.210228,1032.63948,1021.246985,990.263248,973.490822,958.739392,901.985059,899.484445,878.169294,815.404161,766.874134,760.070107,758.805069,756.978822,748.874355,744.947632,699.357719,697.737196,695.966074,680.650071,649.395998,626.335484,625.274395,625.146049,618.756166,609.317805,604.389451,600.308437,554.858692,553.049607,550.60508,538.0712
topic#4,4952.281853,3150.934639,1983.028334,1916.472203,1335.569827,1261.574867,820.555905,695.45657,569.706938,554.765624,544.678245,516.242672,497.363559,488.676729,463.887915,443.751929,423.762859,404.091853,398.648673,396.702223,380.475929,357.737437,357.069959,355.183022,345.839321,344.782522,344.758936,343.221652,329.021107,328.516726,326.947159,324.221133,323.803667,322.292931,310.492953,304.230034,300.71034,299.382319,294.304734,287.097517,261.574632,258.8423,250.969874,246.262412,246.17234,239.289671,233.269567,231.181234,228.161762,225.063022


# 2- Project-topic

In [10]:
projects_topics.head(15)

Unnamed: 0_level_0,topic0,topic1,topic2,topic3,topic4,topic5,topic6,topic7,topic8,topic9,topic10,topic11,topic12,topic13,topic14,topic15,topic16,topic17,topic18,topic19,topic20,topic21,topic22,topic23,topic24,topic25,topic26,topic27,topic28,topic29,topic30,topic31,topic32,topic33,topic34,topic35,topic36,topic37,topic38,topic39,topic40,topic41,topic42,topic43,topic44,topic45,topic46,topic47,topic48,topic49,project,date
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1
0hh1,154.679354,370.315977,0.02,0.02,108.261387,0.02,0.02,0.02,0.02,383.498186,0.02,0.02,1915.227655,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,693.154596,0.02,0.02,0.02,0.02,14.002845,0.02,0.02,0hh1,2016-01
2048,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,125.272649,0.02,0.02,357.778958,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,11.008393,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,2048,2016-01
AtomicGameEngine,51.485598,0.02,0.02,0.02,0.02,0.02,52.723658,0.02,0.02,0.02,0.02,514364.734372,0.02,0.02,10.126121,479.129925,0.02,18.016991,69.979068,0.02,39.148384,0.02,0.02,6062.526671,51.848986,0.02,0.02,0.02,0.02,0.02,52.210457,0.02,0.02,0.02,0.02,0.02,0.02,25.309769,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,AtomicGameEngine,2016-01
BEMSimpleLineGraph,50.391523,20.679554,11.641511,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,53.538642,0.02,0.02,44.646061,0.02,0.02,0.02,20.609001,0.02,0.02,0.02,0.02,0.02,0.02,2367.300401,0.02,659.766167,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,407.607141,0.02,0.02,0.02,0.02,0.02,0.02,BEMSimpleLineGraph,2016-01
BrowserQuest,0.02,0.02,0.02,0.02,9269.873459,0.02,0.02,0.02,0.02,3570.27971,0.02,0.02,715.063545,0.02,0.02,0.02,0.02,223.691239,0.02,0.02,0.02,0.02,0.02,0.02,170.739745,0.02,83.310346,0.02,0.02,0.02,0.02,0.02,68.631843,0.02,0.02,0.02,473.097548,0.02,0.02,0.02,0.02,35.585661,264.077953,0.02,0.02,12.868951,0.02,0.02,0.02,0.02,BrowserQuest,2016-01
CNTK,0.02,0.02,0.02,0.02,21.935886,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,32.708878,0.02,0.02,0.02,0.02,60012.03022,0.02,0.02,0.02,0.02,622.969432,59.866389,0.02,0.02,0.02,0.02,0.02,51.549274,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,63.463221,220.588384,0.02,0.02,0.02,300.068315,0.02,CNTK,2016-01
Caret,0.02,0.02,0.02,0.02,95.396288,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,14.117317,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,135425.546395,0.02,0.02,0.02,0.02,0.02,Caret,2016-01
Chart.js,0.02,0.02,30875.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,Chart.js,2016-01
CodeIgniter,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,41.01838,0.02,3839.609249,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,1431.639394,0.02,0.02,0.02,0.02,0.02,0.02,22765.499196,0.02,731.677077,0.02,0.02,394.042181,94.654519,0.02,0.02,0.02,0.02,0.02,0.02,CodeIgniter,2016-01
Dash,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,87399.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,Dash,2016-01


In [11]:
df = projects_topics.copy()

for i in df.index:
    df.ix[i, df.columns[0:10]] = df.ix[i, df.columns[0:10]]/ df.ix[i, df.columns[0:10]].max()
# df
# print(i)
# print(df.ix[0, df.columns[0:10]])
# print(df.ix[0, df.columns[0:10]].max())
df

Unnamed: 0_level_0,topic0,topic1,topic2,topic3,topic4,topic5,topic6,topic7,topic8,topic9,topic10,topic11,topic12,topic13,topic14,topic15,topic16,topic17,topic18,topic19,topic20,topic21,topic22,topic23,topic24,topic25,topic26,topic27,topic28,topic29,topic30,topic31,topic32,topic33,topic34,topic35,topic36,topic37,topic38,topic39,topic40,topic41,topic42,topic43,topic44,topic45,topic46,topic47,topic48,topic49,project,date
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1
0hh1,4.033379e-01,9.656264e-01,5.215149e-05,5.215149e-05,2.822996e-01,5.215149e-05,5.215149e-05,5.215149e-05,5.215149e-05,1.000000e+00,0.02,0.020000,1915.227655,0.02,0.020000,0.020000,2.000000e-02,0.020000,0.020000,0.02,0.020000,0.02,0.020000,0.020000,0.020000,0.020000,0.020000,0.020000,0.02,0.020000,0.020000,0.020000,0.020000,0.02,0.020000,0.02,0.020000,0.020000,0.02,0.020000,0.020000,0.020000,693.154596,0.020000,0.020000,0.020000,0.02,14.002845,0.020000,0.02,0hh1,2016-01
2048,1.596518e-04,1.596518e-04,1.596518e-04,1.596518e-04,1.596518e-04,1.596518e-04,1.596518e-04,1.596518e-04,1.596518e-04,1.000000e+00,0.02,0.020000,357.778958,0.02,0.020000,0.020000,2.000000e-02,0.020000,0.020000,0.02,0.020000,0.02,0.020000,0.020000,0.020000,11.008393,0.020000,0.020000,0.02,0.020000,0.020000,0.020000,0.020000,0.02,0.020000,0.02,0.020000,0.020000,0.02,0.020000,0.020000,0.020000,0.020000,0.020000,0.020000,0.020000,0.02,0.020000,0.020000,0.02,2048,2016-01
AtomicGameEngine,9.765179e-01,3.793363e-04,3.793363e-04,3.793363e-04,3.793363e-04,3.793363e-04,1.000000e+00,3.793363e-04,3.793363e-04,3.793363e-04,0.02,514364.734372,0.020000,0.02,10.126121,479.129925,2.000000e-02,18.016991,69.979068,0.02,39.148384,0.02,0.020000,6062.526671,51.848986,0.020000,0.020000,0.020000,0.02,0.020000,52.210457,0.020000,0.020000,0.02,0.020000,0.02,0.020000,25.309769,0.02,0.020000,0.020000,0.020000,0.020000,0.020000,0.020000,0.020000,0.02,0.020000,0.020000,0.02,AtomicGameEngine,2016-01
BEMSimpleLineGraph,1.000000e+00,4.103776e-01,2.310212e-01,3.968922e-04,3.968922e-04,3.968922e-04,3.968922e-04,3.968922e-04,3.968922e-04,3.968922e-04,0.02,0.020000,0.020000,0.02,0.020000,53.538642,2.000000e-02,0.020000,44.646061,0.02,0.020000,0.02,20.609001,0.020000,0.020000,0.020000,0.020000,0.020000,0.02,2367.300401,0.020000,659.766167,0.020000,0.02,0.020000,0.02,0.020000,0.020000,0.02,0.020000,0.020000,0.020000,0.020000,407.607141,0.020000,0.020000,0.02,0.020000,0.020000,0.02,BEMSimpleLineGraph,2016-01
BrowserQuest,2.157527e-06,2.157527e-06,2.157527e-06,2.157527e-06,1.000000e+00,2.157527e-06,2.157527e-06,2.157527e-06,2.157527e-06,3.851487e-01,0.02,0.020000,715.063545,0.02,0.020000,0.020000,2.000000e-02,223.691239,0.020000,0.02,0.020000,0.02,0.020000,0.020000,170.739745,0.020000,83.310346,0.020000,0.02,0.020000,0.020000,0.020000,68.631843,0.02,0.020000,0.02,473.097548,0.020000,0.02,0.020000,0.020000,35.585661,264.077953,0.020000,0.020000,12.868951,0.02,0.020000,0.020000,0.02,BrowserQuest,2016-01
CNTK,9.117480e-04,9.117480e-04,9.117480e-04,9.117480e-04,1.000000e+00,9.117480e-04,9.117480e-04,9.117480e-04,9.117480e-04,9.117480e-04,0.02,0.020000,0.020000,0.02,0.020000,32.708878,2.000000e-02,0.020000,0.020000,0.02,60012.030220,0.02,0.020000,0.020000,0.020000,622.969432,59.866389,0.020000,0.02,0.020000,0.020000,0.020000,51.549274,0.02,0.020000,0.02,0.020000,0.020000,0.02,0.020000,0.020000,0.020000,0.020000,63.463221,220.588384,0.020000,0.02,0.020000,300.068315,0.02,CNTK,2016-01
Caret,2.096518e-04,2.096518e-04,2.096518e-04,2.096518e-04,1.000000e+00,2.096518e-04,2.096518e-04,2.096518e-04,2.096518e-04,2.096518e-04,0.02,0.020000,0.020000,0.02,0.020000,0.020000,2.000000e-02,0.020000,0.020000,0.02,0.020000,0.02,0.020000,0.020000,0.020000,0.020000,0.020000,0.020000,0.02,0.020000,0.020000,0.020000,0.020000,0.02,14.117317,0.02,0.020000,0.020000,0.02,0.020000,0.020000,0.020000,0.020000,0.020000,135425.546395,0.020000,0.02,0.020000,0.020000,0.02,Caret,2016-01
Chart.js,6.477729e-07,6.477729e-07,1.000000e+00,6.477729e-07,6.477729e-07,6.477729e-07,6.477729e-07,6.477729e-07,6.477729e-07,6.477729e-07,0.02,0.020000,0.020000,0.02,0.020000,0.020000,2.000000e-02,0.020000,0.020000,0.02,0.020000,0.02,0.020000,0.020000,0.020000,0.020000,0.020000,0.020000,0.02,0.020000,0.020000,0.020000,0.020000,0.02,0.020000,0.02,0.020000,0.020000,0.02,0.020000,0.020000,0.020000,0.020000,0.020000,0.020000,0.020000,0.02,0.020000,0.020000,0.02,Chart.js,2016-01
CodeIgniter,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,0.02,0.020000,0.020000,0.02,0.020000,0.020000,4.101838e+01,0.020000,3839.609249,0.02,0.020000,0.02,0.020000,0.020000,0.020000,0.020000,0.020000,0.020000,0.02,0.020000,1431.639394,0.020000,0.020000,0.02,0.020000,0.02,0.020000,22765.499196,0.02,731.677077,0.020000,0.020000,394.042181,94.654519,0.020000,0.020000,0.02,0.020000,0.020000,0.02,CodeIgniter,2016-01
Dash,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,0.02,0.020000,0.020000,0.02,0.020000,87399.020000,2.000000e-02,0.020000,0.020000,0.02,0.020000,0.02,0.020000,0.020000,0.020000,0.020000,0.020000,0.020000,0.02,0.020000,0.020000,0.020000,0.020000,0.02,0.020000,0.02,0.020000,0.020000,0.02,0.020000,0.020000,0.020000,0.020000,0.020000,0.020000,0.020000,0.02,0.020000,0.020000,0.02,Dash,2016-01


In [2]:
import sys
print(sys.version)

3.4.4 |Anaconda 2.3.0 (64-bit)| (default, Jan 11 2016, 13:54:01) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
