<h1>Globals</h1>

In [1]:
from __future__ import print_function
from datetime import datetime
from git import Repo, Git
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import time
import os.path
import re, string, ntpath, keyword, json, codecs
import threading
import shutil, errno


skip_projects = [ "meteor"]

tag_names = ["2016-06","2016-01","2015-06","2015-01","2014-06","2014-01","2013-06","2013-01",
"2012-06","2012-01","2011-06","2011-01","2010-06","2010-01","2009-06","2009-01",
"2008-06","2008-01","2007-06","2007-01","2006-06","2006-01","2005-06","2005-01",
"2004-06","2004-01","2003-06","2003-01"]

project_type_map = {
	"androidannotations-tags": ".java",
	"bigbluebutton-tags": ".java",
	"cassandra-tags": ".java",
	"elasticsearch-tags": ".java",
	"hibernate-orm-tags": ".java",
	"liferay-portal-tags": ".java",
	"netty-tags": ".java",
	"platform_frameworks_base-tags": ".java",
	"spring-framework-tags": ".java",
	"wildfly-tags": ".java",
	"laravel-tags": ".php",
	"symfony-tags": ".php",
	"cakephp-tags": ".php",
	"CodeIgniter-tags": ".php",
	"rails-tags": ".rb",
	"sinatra-tags": ".rb",
	"padrino-framework-tags": ".rb",
	"hanami-tags": ".rb",
	"pakyow-tags": ".rb",
	"flask-tags": ".py",
	"django-tags": ".py",
	"web2py-tags": ".py",
	"frappe-tags": ".py",
	"ninja-tags": ".java",
	"meteor-tags": "javascript",
	"express-tags": "javascript",
	"sails-tags": "javascript",
	"mean-tags": "javascript",
	"derby-tags": "javascript",
	"nodal-tags": "javascript"
}

def load_config(config_file):
    """
    Load projects configuration file.
    """
    with open(config_file) as data_file:    
        config_data = json.load(data_file)
    return config_data

def copy_folder(src, dst):
    try:
        if os.path.exists(dst):
            shutil.rmtree(dst)
        shutil.copytree(src, dst)
    except OSError as exc: # python >2.5
        if exc.errno == errno.ENOTDIR:
            shutil.copy(src, dst)
        else: raise
            
def get_immediate_subdirectories(a_dir):
    return [name for name in os.listdir(a_dir)
            if os.path.isdir(os.path.join(a_dir, name))]

base_dir = "/home/hshahin/workspaces/Spring2016_SE_Project"
data_dir = os.path.join(base_dir , "data")
config_file = "projects_config.json"
config_data = load_config(os.path.join(base_dir , config_file))

In [2]:
# path = '/home/hshahin/workspaces/Spring2016_SE_Project/data'
# get_immediate_subdirectories(path)

<h1>Creating tags functions</h1>

In [3]:
def get_date_time(epoch):
    '''
    convert epoch to date_time
    '''
    return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(epoch))

def tag_exists(path, tag_name):
    repo = Repo(path)
    
    return True if tag_name in repo.tags else False

def get_epoch(year, month='01'):
    """
    calculate the epoch of first day of a year-month
    """
    pattern = '%Y.%m.%d %H:%M:%S'
    return int(time.mktime(time.strptime(str(year) + '.' + str(month) + '.01 00:00:00', pattern)))


def create_tags(path):
    '''
    takes repo path and creates tags for first commit in Jan and Jun. for every year
    # get the list of commits
    # get the latest commit date
    # current_year is the year from that date
    # loop through the list of commit to find the commit having a date equal or just after 1/1/current_year
    # once found create a tage with the current_year name on it AND
    # subtract 1 from the year and continue.

    '''
    repo = Repo(path)

    # get the list of commits
    commits = list(repo.iter_commits())

    # get the latest commit date, current_year is the year from that date
    current_year = datetime.fromtimestamp(commits[0].committed_date).year


    for idx, commit in enumerate(commits):
        # time.sleep(2)
        # print(commits[idx].hexsha)

        current_year_01 = str(current_year)+'-01'
        current_year_06 = str(current_year)+'-06'

        try:
            if get_epoch(current_year, '01') > commit.committed_date and \
                    int(time.time()) > get_epoch(current_year, '01')  and \
                    idx !=0:
                if str(current_year_01) not in repo.tags and idx != 0:
                    print(commits[idx-1].hexsha+' '+get_date_time(commits[idx-1].committed_date)+' '+current_year_01)
                    past = repo.create_tag(current_year_01, ref=commits[idx-1],
                                      message="This is a tag to mark the first commit in year %s" % current_year_01)
                current_year = datetime.fromtimestamp(commit.committed_date).year

            if get_epoch(current_year, '06') > commit.committed_date and \
                    int(time.time()) > get_epoch(current_year, '06') and \
                idx != 0:
                if str(current_year_06) not in repo.tags:
                    print(commits[idx-1].hexsha+' '+get_date_time(commits[idx-1].committed_date)+' '+current_year_06)
                    past = repo.create_tag(current_year_06, ref=commits[idx-1],
                                      message="This is a tag to mark the first commit in year %s" % current_year_06)
        except AttributeError:
            pass

def checkout_tag(path, tag_name):
    '''
    checks out a tag if it exists
    '''
    repo = Repo(path)
    git = Git(path)
    if tag_name in repo.tags:
        git.checkout(tag_name)

def delete_tags(path):
    '''
    remove all tags in a given repo
    '''

    repo = Repo(path)

    for tag in repo.tags:
        repo.delete_tag(tag)

<h1>Create tags every 6 months for each repo</h1>

In [4]:
# for project_name, project_type in config_data.items():
#     print("Processing project: " + project_name )
#     t0 = time.time()
#     delete_tags(os.path.join(data_dir, project_name))
#     create_tags(os.path.join(data_dir, project_name))
#     print("Project: " + project_name + " taged in %0.3fs." % (time.time() - t0))

<h1>Preprocesing functions</h1>

In [5]:
# Python keywords
python_keywords = keyword.kwlist

# Java keywords from https://docs.oracle.com/javase/tutorial/java/nutsandbolts/_keywords.html
java_keywords = ["abstract","continue","for","new","switch","assert","default","goto","package","synchronized",
                 "boolean","do","if","private","this","break","double","implements","protected","throw",
                 "byte","else","import","public","throws","case","enum","instanceof","return","transient","catch",
                 "extends","int","short","try","char","final","interface","static","void","class","finally","long",
                 "strictfp","volatile","const","float","native","super","while"]

# Ruby keywords from http://docs.ruby-lang.org/en/2.2.0/keywords_rdoc.html
ruby_keywords = ["__ENCODING__","__LINE__","__FILE__","BEGIN","END","alias","and","begin","break",
                 "case","class","def","defined?","do","else","elsif","end","ensure","false","for","if",
                 "in","module","next","nil","not","or","redo","rescue","retry","return","self","super",
                 "then","true","undef","unless","until","when","while","yield"]

# PHP keywords form http://php.net/manual/en/reserved.keywords.php
php_keywords = ["__halt_compiler","abstract","and","array","as","break","callable","case","catch","class","clone",
                "const","continue","declare","default","die","do","echo","else","elseif","empty","enddeclare",
                "endfor","endforeach","endif","endswitch","endwhile","eval","exit","extends","final","finally",
                "for","foreach","function","global","goto","if","implements","include","include_once","instanceof",
                "insteadof","interface","isset","list","namespace","new","or","print","private","protected",
                "public","require","require_once","return","static","switch","throw","trait","try","unset","use",
                "var","while","xor","yield"]

all_keywords = python_keywords + java_keywords + ruby_keywords + php_keywords


def path_leaf(path):
    head, tail = ntpath.split(path)
    return head, tail

# split camel case tokens
_underscorer1 = re.compile(r'(.)([A-Z][a-z]+)')
_underscorer2 = re.compile('([a-z0-9])([A-Z])')


def camel_to_spaces(s):
    """
    convert camel case into spaces seperated
    """
    subbed = _underscorer1.sub(r'\1 \2', s)
    return _underscorer2.sub(r'\1 \2', subbed).lower()

def snake_to_spaces(snake_cased_str):
    """
    convert snake case into spaces seperated
    """
    separator = "_"
    components = snake_cased_str.split(separator)
    if components[0] == "":
        components = components[1:]
    if components[-1] == "":
        components = components[:-1]
    if len(components) > 1:
        spaced_str = components[0].lower()
        for x in components[1:]:
            spaced_str += " " + x.lower()
    else:
        spaced_str = components[0]
    return spaced_str


def file_preprocessing(input_file, output_file):
    """
    - replace punctuations with spaces
    - stemming
    - camel to spaces and snake to spaces
    - remove language spesific keywords
    - write the entire project snapshot into one file under project root folder
    """
    # print("processing file " + input_file)
    # replace the punctuations with space
    replace_punctuation = str.maketrans(string.punctuation, ' '*len(string.punctuation))
    # stemming
    stemmer = PorterStemmer()

    with open(input_file, 'r', encoding='utf-8', errors='replace') as inFile, open(output_file,'w') as outFile:
        for line in inFile:
            # replace punctuations
            # convert camel case into space separated
            # convert snake case into space separated             
            # remove language keywords
            line_witout_puncs = ' '.join([snake_to_spaces(camel_to_spaces(word)) 
                                          for word in line.translate(replace_punctuation).split()
                                          if len(word) >=4 and word not in stopwords.words('english') 
                                          and word not in all_keywords])

            # stemming
            singles = []
            for plural in line_witout_puncs.split():
                try:
                    singles.append(stemmer.stem(plural))
                except UnicodeDecodeError:
                    print(plural) 

            line_stemmed = ' '.join(singles)
            print(line_stemmed, file=outFile)


def return_file_type(project_path, file_type):
    project_files = [os.path.join(root, name)
             for root, dirs, files in os.walk(project_path)
             for name in files
             if name.endswith((file_type))]
    return project_files

def project_preprocessing(project_path, file_type, tag_name):
    # print ("processing project "+ project_path)
    # process project source code files and save each file as .*.proc 
    project_files = return_file_type (project_path, file_type)
    for source_file in project_files:
        head, tail = path_leaf(source_file)
        proc_file = os.path.join(head , tail + '.proc')
        file_preprocessing(source_file, proc_file)
        
    # concatenate all processed project files into one file under root directory
    project_proc_files = return_file_type (project_path, file_type + '.proc')
    with open(os.path.join(project_path , "final-processed.out"), 'w') as outfile:
        for fname in project_proc_files:
            with open(fname) as infile:
                for line in infile:
                    outfile.write(line)

<h1>checkout tags in separate folders</h1>

In [6]:
# create folder project_tags
# for each tag if tag exists
# copy the project into project_tag/tag_name
# checkout ptoject to tag_name
# delete .git folder
# def checkout_projects():
#     for project_name, project_type in config_data.items():
#         project_path = os.path.join(data_dir, project_name)
#         project_tags_path = project_path + '-tags'
        
#         if project_name not in skip_projects:
#             if not os.path.exists(project_tags_path):
#                 os.makedirs(project_tags_path)

#             repo = Repo(project_path)
#             for tag_name in tag_names:
#                 if tag_exists(project_path, tag_name):
#                     print("Copying "+project_name+' '+tag_name)
#                     current_tag_path = os.path.join(project_tags_path, tag_name)
#                     copy_folder(project_path, current_tag_path)

#             for tag_name in tag_names:
#                 if tag_exists(project_path, tag_name):
#                     print("Checkout "+project_name+' '+tag_name)
#                     current_tag_path = os.path.join(project_tags_path, tag_name)
#                     checkout_tag(current_tag_path, tag_name)

#             for tag_name in tag_names:
#                 if tag_exists(project_path, tag_name):
#                     print("deleting .git "+project_name+' '+tag_name)
#                     current_tag_path = os.path.join(project_tags_path, tag_name)
#                     os.chdir(current_tag_path)
#                     shutil.rmtree(os.path.join(current_tag_path, '.git'))

# checkout_projects()

<h1>Run preprocessing</h1>

In [7]:
from multiprocessing import Pool

def run_preprocessing(project_tags_dir):
    project_tags_path = os.path.join(data_dir , project_tags_dir)
    print('---------------'+project_tags_path)

    for project_tag in get_immediate_subdirectories(project_tags_path):
        project_tag_path = os.path.join(project_tags_path , project_tag)
        t0 = time.time()
        project_preprocessing(project_tag_path, project_type_map[project_tags_dir], project_tag)
        print("processing project: " + project_tags_path + "\t tag " 
              + project_tag + " done in %0.3fs." % (time.time() - t0))
    print('****This thread is done:', os.getpid())
     

project_tags_paths = get_immediate_subdirectories(data_dir)
pool = Pool(16)
pool.map(run_preprocessing, project_tags_paths)
   
# for project_tags_path in get_immediate_subdirectories(data_dir):
#     preprocessing_threads = threading.Thread(target=run_preprocessing, args=(project_tags_path, ))
#     preprocessing_threads.start()
    
print('Main process Done...............')

---------------/home/hshahin/workspaces/Spring2016_SE_Project/data/sinatra-tags
---------------/home/hshahin/workspaces/Spring2016_SE_Project/data/web2py-tags
---------------/home/hshahin/workspaces/Spring2016_SE_Project/data/symfony-tags
---------------/home/hshahin/workspaces/Spring2016_SE_Project/data/netty-tags
---------------/home/hshahin/workspaces/Spring2016_SE_Project/data/mean-tags
---------------/home/hshahin/workspaces/Spring2016_SE_Project/data/androidannotations-tags
---------------/home/hshahin/workspaces/Spring2016_SE_Project/data/sails-tags
---------------/home/hshahin/workspaces/Spring2016_SE_Project/data/laravel-tags
---------------/home/hshahin/workspaces/Spring2016_SE_Project/data/pakyow-tags
---------------/home/hshahin/workspaces/Spring2016_SE_Project/data/bigbluebutton-tags
---------------/home/hshahin/workspaces/Spring2016_SE_Project/data/django-tags
---------------/home/hshahin/workspaces/Spring2016_SE_Project/data/frappe-tags
---------------/home/hshahin/works

<h1>Topic Modeling</h1>

In [18]:
from __future__ import print_function
import os
from time import time
from os import listdir
from os.path import isdir

from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.decomposition import NMF, LatentDirichletAllocation
from sklearn.datasets import fetch_20newsgroups
import pandas as pd
import numpy as np
# pd.set_option('display.mpl_style', 'default') 
pd.set_option('display.width', 5000) 
pd.set_option('display.max_columns', 60) 
import matplotlib.pyplot as plt

n_features = 1000000
n_topics = 10
# n_top_words = 100


config_file = "projects_config_webFrameworks.json"
# config_file = 'projects_config_topGithub.json'
selected_projects = load_config(os.path.join(base_dir , config_file))


def print_top_words(model, feature_names, n_top_words):
    for topic_idx, topic in enumerate(model.components_):
        print("Topic #%d:" % topic_idx)
        print(" ".join([feature_names[i] for i in topic.argsort()[:-n_top_words - 1:-1]]))
    print()

def get_top_words(model, feature_names, n_top_words):
    df = pd.DataFrame(columns=['word'+str(i) for i in range(n_top_words)])
    for topic_idx, topic in enumerate(model.components_):
        df.loc['topic#'+str(topic_idx)] = [feature_names[i] for i in topic.argsort()[:-n_top_words - 1:-1]]
        df.loc['freq#'+str(topic_idx)] = [topic[i] for i in topic.argsort()[:-n_top_words - 1:-1]]
    
    return df

def get_top_doc_topic(projects_topics, project_names, n_top_words):
    df = pd.DataFrame(columns=['word'+str(i) for i in range(n_top_words)])
    for topic_idx, topic in enumerate(model.components_):
        df.loc['topic#'+str(topic_idx)] = [feature_names[i] for i in topic.argsort()[:-n_top_words - 1:-1]]
        df.loc['freq#'+str(topic_idx)] = [topic[i] for i in topic.argsort()[:-n_top_words - 1:-1]]
    
    return df

def print_full(x):
    pd.set_option('display.max_colwidth', 1000)
    print(x)
    pd.reset_option('display.max_rows')

# Read projects into strings
projects_data = []
project_names = []
for i, project_name in enumerate(selected_projects.keys()):
    # For each snapshot of the project
    snapshots = []
    project_path = os.path.join(data_dir, project_name+'-tags')
    try:
        snapshots = [os.path.join(project_path, p) 
                     for p in listdir(project_path) if isdir(os.path.join(project_path, p))]
    except FileNotFoundError:
         print('------Project Not found: '+project_name)   
    for snapshot in snapshots:
        project_names.append('_'.join(snapshot.split('/')[-2:]))
        # print(project_names[-1])
        processed_path = os.path.join(snapshot, "final-processed.out")
        with open(processed_path, 'r') as myfile:
            projects_data.append(myfile.read().replace('\n', ' '))
            

print('num of projects: ', len(projects_data))
# print(projects_data[0])
   
# Use tf (raw term count) features for LDA.
print("Extracting tf features for LDA...")
tf_vectorizer = CountVectorizer(max_df=0.7, min_df=0.1, max_features=n_features, stop_words='english')

t0 = time()
tf = tf_vectorizer.fit_transform(projects_data)
print("done in %0.3fs." % (time() - t0))

print('tf shape:', tf.shape)

print("Fitting LDA models with tf features")
lda = LatentDirichletAllocation(n_topics=n_topics, max_iter=100,
                                learning_method='online', learning_offset=10.,
                                random_state=0, n_jobs=10)
t0 = time()
lda.fit(tf)

print("done in %0.3fs." % (time() - t0))

# Save lda into a pickle file.
import pickle

pickle.dump(lda, open("lda_7_1.p", "wb"))

# lda = pickle.load(open("lda_5_1.p", "rb"))


------Project Not found: spring-framework
------Project Not found: padrino-framework
num of projects:  226
Extracting tf features for LDA...
done in 108.308s.
tf shape: (226, 7870)
Fitting LDA models with tf features
done in 210.133s.


## 1- Topic-word

In [21]:
print("\nTopics-words in LDA model:")
tf_feature_names = tf_vectorizer.get_feature_names()
topic_word = get_top_words(lda, tf_feature_names, 50)
topic_word.to_csv('topic_word_7_1.csv')
topic_word


Topics-words in LDA model:


Unnamed: 0,word0,word1,word2,word3,word4,word5,word6,word7,word8,word9,word10,word11,word12,word13,word14,word15,word16,word17,word18,word19,word20,word21,word22,word23,word24,word25,word26,word27,word28,word29,word30,word31,word32,word33,word34,word35,word36,word37,word38,word39,word40,word41,word42,word43,word44,word45,word46,word47,word48,word49
topic#0,cake,licens,articl,plugin,true,php,foundat,softwar,fals,associ,fixtur,task,properti,translat,mock,shell,auth,router,rule,book,2007,plural,column,pagin,notic,retain,opensourc,redistribut,ha,primari,dispatch,publish,socket,behavior,project,record,reg,belong,contact,rapid,exp,network,token,categori,foreign,driver,bool,list,inflector,bake
freq#0,87115.4,41884.7,26243.7,25702,20782.6,18853.3,11462,11261.1,10503.8,10215.2,9763.47,9527.94,9464.23,9377.08,8905.67,7928.39,7900.23,7682.08,7451.2,7297.73,6697.65,6622.24,6330.1,6252.74,6212,5696.76,5673.41,5671.37,5661.48,5540.88,5526.04,5042.62,4930.03,4846.23,4768.88,4645.18,4358.66,4241.84,4216.57,4155.99,4057.58,4044.81,4025.08,3976.51,3965.23,3837.52,3825.32,3773.64,3771,3440.39
topic#1,record,column,topic,transit,associ,person,rail,firm,zone,migrat,fixtur,david,account,nodoc,project,reflect,respond,2005,klass,month,rubi,compani,destroy,peopl,adapt,dispatch,transact,belong,primari,autoload,mailer,categori,asset,written,proc,child,admin,node,offset,repli,accessor,datetim,task,member,foreign,plugin,word,hour,token,flash
freq#1,92550.4,51966.2,46489,41862.7,38538.1,37551.2,32712.3,29414.9,22138.4,21047.5,21013.3,20792.6,19637.7,19362.2,18905.3,18381.7,16673.1,15915.8,15559.5,14844.7,14712.7,14561.9,14438.2,14262.2,14058.9,13116.8,11928.3,11472.7,11344.1,11134.2,11133.8,10957.4,10218.1,10197.8,10082.7,9388.78,9345.56,9012.84,8949.46,8923.39,8582.72,8388.03,8032.3,7767.89,7688.05,7564.36,7003.8,6870.37,6785.96,6764.71
topic#2,symfoni,interfac,licens,node,token,properti,bundl,choic,constraint,street,distribut,servic,transform,kernel,factori,listen,resolv,mock,metadata,formatt,sprintf,true,id,child,fixtur,translat,dispatch,php,ident,profil,storag,ha,children,fals,twig,violat,foundat,intl,role,dir,bag,doctrin,gmail,finder,constructor,acl,matcher,tran,list,bool
freq#2,197211,70192.3,58530.2,42771.6,39556.4,39407.1,32778.1,30116.1,28373.8,28267.8,28213.9,27869.5,26354.7,25026.2,23677.6,20380.7,18221,18060.1,17254.6,16573.3,15823.7,14250.3,14195.6,13995.8,13593.4,13336.7,13263,12122.1,11733.9,11676.5,11629.5,11490.8,11275.6,10629.9,10467.9,10190.8,9802.68,9719.87,9549.48,9001.87,8717.47,8603.37,8384.83,8264.37,8138.44,8069.96,8033.49,7859.2,7790.38,7628.02
topic#3,django,admin,datetim,articl,kwarg,dict,choic,unicod,book,middlewar,python,contrib,lookup,geo,widget,geometri,extra,person,isinst,backend,column,storag,foreign,func,decim,migrat,node,token,oper,verbos,cursor,opt,languag,media,translat,inlin,auth,tupl,serial,month,permiss,properti,true,report,child,headlin,getattr,list,transact,categori
freq#3,120831,75543.5,54570.5,45220.7,43430.7,39543.2,37325.1,34070.2,29804.7,28542.1,28535.5,25837.3,24247.5,24110.8,23456.6,22780.4,22048.2,22032.2,21924.1,21049.1,20442.4,20243.3,20018.7,19051.8,18709.1,18185.2,18137.6,17599.9,17079.7,16619.7,16450.4,16253.5,15792.7,15703.2,15447.8,15443.3,14820.5,14507.9,13721.6,13318.7,12733.1,12426.5,12375,12011.5,11735.9,11608.8,11336.6,11319.5,10908.2,10860.6
topic#4,licens,fals,softwar,ninja,true,conn,lang,permiss,properti,driver,bool,fieldnam,languag,column,tab,preg,elli,obtain,apach,lab,notic,subject,charact,upload,warranti,role,technolog,team,segment,deal,mock,profil,row,temp,basepath,guid,2012,shall,newlin,conf,person,flash,distribut,unabl,free,impli,alter,articl,month,platform
freq#4,37904,19863.4,18319.3,17318.9,16572.6,12683.8,11392.8,9604.97,8011.77,7985.91,7915.07,7616.94,7505.69,7368.66,6533.37,5339.97,5292.86,4957.05,4823.29,4817.29,4742.3,4588.13,4585.91,4373.31,4200.28,4027.05,3810.65,3806.12,3696.43,3639.45,3547.4,3537.38,3478.9,3421.36,3413.34,3407.41,3403.39,3368.16,3363.3,3318.85,3287.05,3034.74,3025.57,3005.57,2976.41,2854.66,2848.67,2818.19,2766.14,2609.53


# 2- Project-topic

In [22]:
projects_topics = lda.transform(tf)
projects_topics.shape

(226, 10)

In [23]:
df = pd.DataFrame(projects_topics, columns=['topic'+str(i) for i in range(10)])
df['project'] = project_names
df['project'] = df['project'].apply(lambda x: x.split('_')[0].split('-')[0])
df['date'] = project_names
df['date'] = df['date'].apply(lambda x: x.split('_')[1])
df.index = project_names
df.to_csv('project-topic_7_1.csv')
df.head()

Unnamed: 0,topic0,topic1,topic2,topic3,topic4,topic5,topic6,topic7,topic8,topic9,project,date
cakephp-tags_2014-06,110276.370135,0.100024,0.100023,0.100022,0.100025,0.100025,41995.82968,0.100018,0.100024,0.100023,cakephp,2014-06
cakephp-tags_2012-01,72752.157767,0.100022,0.100037,0.10002,0.100022,0.100022,58371.042053,0.100017,0.100021,0.10002,cakephp,2012-01
cakephp-tags_2006-06,6427.725487,48.39582,0.100024,0.100026,246.901196,0.100027,12059.377344,0.100016,0.100029,0.10003,cakephp,2006-06
cakephp-tags_2016-01,123996.816797,0.100029,1010.382988,0.100027,0.100027,0.100029,0.100022,0.100026,0.100027,0.100027,cakephp,2016-01
cakephp-tags_2013-06,102283.891549,0.100024,0.100023,0.100021,0.100024,0.100025,43675.308271,0.100019,0.100022,0.100022,cakephp,2013-06


In [134]:
print()




In [100]:
# proj_df = df.groupby('project')
# for i, g in proj_df:
#     g[df.columns[0:7]].plot(kind='bar')
