In [1]:
import os
import nltk
import re
from bs4 import BeautifulSoup
from gensim.models.word2vec import Word2Vec
from gensim.models import KeyedVectors
from tinydb import TinyDB
from nltk.tokenize import word_tokenize, sent_tokenize

In [99]:
def clean_and_tokenize(sentence):
    sentence = sentence.lower()
    text = BeautifulSoup(sentence, "lxml").get_text()
    text = re.sub("[^a-zA-Z]"," ", text)
    return word_tokenize(text)

In [104]:
def read_single_repo(path):
    word_list = []
    if os.path.isfile(path):
        db = TinyDB(path)
        try:
            for entry in db:
                for value in entry['issues']:
                    if value['title'] is not None and value['body'] is not None:
                        word_list.append(clean_and_tokenize(value['title'] + " " + value['body']))
                    elif value['body'] is not None:
                        word_list.append(clean_and_tokenize(value['body']))
                    elif value['title'] is not None:
                        word_list.append(clean_and_tokenize(value['title']))

                for value in entry['issue_comments']:
                    if value['body'] is not None:
                        word_list.append(clean_and_tokenize(value['body']))

                for value in entry['pull_requests']:
                    if value['title'] is not None and value['body'] is not None:
                        word_list.append(clean_and_tokenize(value['title'] + " " + value['body']))
                    elif value['body'] is not None:
                        word_list.append(clean_and_tokenize(value['body']))
                    elif value['title'] is not None:
                        word_list.append(clean_and_tokenize(value['title']))

                for value in entry['review_comments']:
                    if value['body'] is not None:
                        word_list.append(clean_and_tokenize(value['body']))

                for value in entry['commits']:
                    if value['commit']['message'] is not None:
                        word_list.append(clean_and_tokenize(value['commit']['message']))

                for value in entry['commit_comments']:
                    if value['body'] is not None:
                        word_list.append(clean_and_tokenize(value['body']))
        except:
            print(path)
    return word_list

In [108]:
read_single_repo(path="../data/github-repos-all/0xPoly/Centry.json")

[['fix',
  'broken',
  'headings',
  'in',
  'markdown',
  'files',
  'github',
  'changed',
  'the',
  'way',
  'markdown',
  'headings',
  'are',
  'parsed',
  'so',
  'this',
  'change',
  'fixes',
  'it',
  'see',
  'bryant',
  'readmesfix',
  'https',
  'github',
  'com',
  'bryant',
  'readmesfix',
  'for',
  'more',
  'information',
  'tackles',
  'bryant',
  'readmesfix'],
 ['windows', 'how', 'do', 'i', 'use', 'this', 'on', 'windows'],
 ['does',
  'not',
  'work',
  'on',
  'osx',
  'mavericks',
  'tried',
  'with',
  'default',
  'python',
  'fails',
  'to',
  'find',
  'tkinter',
  'locgical',
  'since',
  'tkinter',
  'uppercase',
  'was',
  'before',
  'x',
  'tried',
  'with',
  'brew',
  'installed',
  'python',
  'just',
  'getting',
  'failed',
  'to',
  'bind',
  'to',
  'udp',
  'socket',
  'and',
  'nothing',
  'else',
  'tried',
  'with',
  'official',
  'installer',
  'installed',
  'python',
  'same',
  'and',
  'nothing',
  'happens',
  'am',
  'i',
  'missing',


In [109]:
def read_all_repos(path):
    word_list = []
    if os.path.isdir(path):
        for root, dirs, files in os.walk(path):
            print("Processing %s" % root)
            for file in files:
                word_list.extend(read_single_repo(path=os.path.join(root, file)))
        print(word_list)
    return word_list

In [4]:
def train_word_vectors(sentences):
    model = Word2Vec(sentences, size=300)
    model.save('../models/embedding/word2vec_base')
    return model

In [112]:
token_matrix = read_all_repos(path="../data/github-repo-msg")

Processing ../data/github-repo-msg
Processing ../data/github-repo-msg/fish2000
Processing ../data/github-repo-msg/waltervargas
Processing ../data/github-repo-msg/franksl
Processing ../data/github-repo-msg/schjan
Processing ../data/github-repo-msg/cmonterrosa
Processing ../data/github-repo-msg/nicanorperera


  ' Beautiful Soup.' % markup)


Processing ../data/github-repo-msg/Ydle
Processing ../data/github-repo-msg/Elive
Processing ../data/github-repo-msg/liu-chong
Processing ../data/github-repo-msg/tjfontaine
Processing ../data/github-repo-msg/lukecampbell
Processing ../data/github-repo-msg/jaapverloop
Processing ../data/github-repo-msg/SomethingExplosive
Processing ../data/github-repo-msg/bwildenhain
Processing ../data/github-repo-msg/ckw-mod
Processing ../data/github-repo-msg/ansible


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
  ' Beautiful Soup.' % markup)


Processing ../data/github-repo-msg/Kalbintion
Processing ../data/github-repo-msg/Ariloy
Processing ../data/github-repo-msg/phillord
Processing ../data/github-repo-msg/AgilTec
Processing ../data/github-repo-msg/mthli
Processing ../data/github-repo-msg/muchomasfacil
Processing ../data/github-repo-msg/Skobayashi
Processing ../data/github-repo-msg/Querela
Processing ../data/github-repo-msg/RedTurtle
Processing ../data/github-repo-msg/NativeScript
Processing ../data/github-repo-msg/lpotter
Processing ../data/github-repo-msg/Stibbons
Processing ../data/github-repo-msg/AgencyPMG
Processing ../data/github-repo-msg/zeronullity
Processing ../data/github-repo-msg/dolphin-emu


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beau

Processing ../data/github-repo-msg/martynsmith
Processing ../data/github-repo-msg/microcai


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/SRombauts
Processing ../data/github-repo-msg/airbnb


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/jimlindstrom
Processing ../data/github-repo-msg/jjbunn
Processing ../data/github-repo-msg/tgjones
Processing ../data/github-repo-msg/joyent
Processing ../data/github-repo-msg/mono


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/fish-shell


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/iancook75
Processing ../data/github-repo-msg/ramen
Processing ../data/github-repo-msg/composer


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/numat
Processing ../data/github-repo-msg/vext01
Processing ../data/github-repo-msg/h2o


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/jburman
Processing ../data/github-repo-msg/aspnet


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beau

" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beau

Processing ../data/github-repo-msg/docker
Processing ../data/github-repo-msg/RobinRadic
Processing ../data/github-repo-msg/UltraSabreman
Processing ../data/github-repo-msg/smcameron
Processing ../data/github-repo-msg/OCA
Processing ../data/github-repo-msg/verdigris
Processing ../data/github-repo-msg/chbrown
Processing ../data/github-repo-msg/puppetlabs


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/tornadoweb


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beau

Processing ../data/github-repo-msg/seblin
Processing ../data/github-repo-msg/dotnet


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
  ' that document to Bea

Processing ../data/github-repo-msg/slowmoVideo
Processing ../data/github-repo-msg/sphaero
Processing ../data/github-repo-msg/vzvu3k6k
Processing ../data/github-repo-msg/liquidise
Processing ../data/github-repo-msg/alanbem
Processing ../data/github-repo-msg/videolan
Processing ../data/github-repo-msg/onaio


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/niw
Processing ../data/github-repo-msg/grpc
Processing ../data/github-repo-msg/takawitter
Processing ../data/github-repo-msg/zfsrogue
Processing ../data/github-repo-msg/pithos


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beau

Processing ../data/github-repo-msg/versionone
Processing ../data/github-repo-msg/swoole


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
请参照这里的提示操作一下。
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/lex-lingo
Processing ../data/github-repo-msg/chef


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beau

Processing ../data/github-repo-msg/open-epicycle
Processing ../data/github-repo-msg/django


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/jruby
Processing ../data/github-repo-msg/bingoohuang
Processing ../data/github-repo-msg/symfony
Processing ../data/github-repo-msg/rhq-project
Processing ../data/github-repo-msg/dreikanter
Processing ../data/github-repo-msg/hcs
../data/github-repo-msg/hcs/hcs-cloud.json
Processing ../data/github-repo-msg/colszowka
Processing ../data/github-repo-msg/FFmpeg


  ' that document to Beautiful Soup.' % decoded_markup
  ' that document to Beautiful Soup.' % decoded_markup
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/humangeo
Processing ../data/github-repo-msg/owncloud


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beau

Processing ../data/github-repo-msg/jenkinsci


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
http://javadoc.jenkins-c

Processing ../data/github-repo-msg/WordPress


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beau

Processing ../data/github-repo-msg/FrankHB
Processing ../data/github-repo-msg/os6sense
Processing ../data/github-repo-msg/PowerKiKi
Processing ../data/github-repo-msg/Homebrew


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beau

Processing ../data/github-repo-msg/abulrim
Processing ../data/github-repo-msg/PigeonPack
Processing ../data/github-repo-msg/LSST
Processing ../data/github-repo-msg/mconf
Processing ../data/github-repo-msg/gbagnoli
Processing ../data/github-repo-msg/ambitioninc
Processing ../data/github-repo-msg/gpac


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/higanworks-cookbooks
Processing ../data/github-repo-msg/AfterTheRainOfStars
Processing ../data/github-repo-msg/BobKingstone
Processing ../data/github-repo-msg/ausaccessfed


  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/mkraft
Processing ../data/github-repo-msg/lbitonti
Processing ../data/github-repo-msg/brigittewarner
Processing ../data/github-repo-msg/etherdev
Processing ../data/github-repo-msg/emersion
Processing ../data/github-repo-msg/ldrumm
Processing ../data/github-repo-msg/scrapy


  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/celery


  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/relldoesphp
Processing ../data/github-repo-msg/google


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
  ' that document to Beautiful Soup.' % decoded_markup
http://code.google.com/p/googletransitdatafeed/issues/detail?id=167" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL,

Processing ../data/github-repo-msg/kzoll
Processing ../data/github-repo-msg/jessy1092
Processing ../data/github-repo-msg/smartpension
Processing ../data/github-repo-msg/openregister
Processing ../data/github-repo-msg/wangduoxiong
Processing ../data/github-repo-msg/mgkimsal
Processing ../data/github-repo-msg/barterli


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/rcbops-cookbooks
Processing ../data/github-repo-msg/sangotaro
Processing ../data/github-repo-msg/singuerinc
Processing ../data/github-repo-msg/mirego
Processing ../data/github-repo-msg/semantic-dependency-parsing
Processing ../data/github-repo-msg/Serneum
Processing ../data/github-repo-msg/rafallo
Processing ../data/github-repo-msg/dalmirdasilva
Processing ../data/github-repo-msg/github


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/dsoprea
Processing ../data/github-repo-msg/xbmc


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/GNOME
Processing ../data/github-repo-msg/xenserver
Processing ../data/github-repo-msg/lazymaniac
Processing ../data/github-repo-msg/mapbox
Processing ../data/github-repo-msg/bitcoin


  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/veg
Processing ../data/github-repo-msg/blockchain
Processing ../data/github-repo-msg/koolkode
Processing ../data/github-repo-msg/xamarin
Processing ../data/github-repo-msg/twitter


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/mensfeld
Processing ../data/github-repo-msg/TI-OpenLink
Processing ../data/github-repo-msg/darioquintana
Processing ../data/github-repo-msg/pgoergler
Processing ../data/github-repo-msg/mteodori
Processing ../data/github-repo-msg/aamattos
Processing ../data/github-repo-msg/tomana
Processing ../data/github-repo-msg/PuzzleOpenDataHackdayTeam
Processing ../data/github-repo-msg/altstone
Processing ../data/github-repo-msg/Microsoft
Processing ../data/github-repo-msg/goccy
Processing ../data/github-repo-msg/jlagerweij
Processing ../data/github-repo-msg/nano-byte
Processing ../data/github-repo-msg/gabepolk
Processing ../data/github-repo-msg/couchbase


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/generators-io-projects
Processing ../data/github-repo-msg/novapost
Processing ../data/github-repo-msg/libdynd


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/structured-commons
Processing ../data/github-repo-msg/kennyma
Processing ../data/github-repo-msg/DevCabin
Processing ../data/github-repo-msg/xfce-mirror
Processing ../data/github-repo-msg/tanel
Processing ../data/github-repo-msg/usgs
Processing ../data/github-repo-msg/mysql


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/vivid-planet
Processing ../data/github-repo-msg/langner
Processing ../data/github-repo-msg/NESCent
Processing ../data/github-repo-msg/jgauffin
Processing ../data/github-repo-msg/packfire
Processing ../data/github-repo-msg/aws


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/apanzerj
Processing ../data/github-repo-msg/non117
Processing ../data/github-repo-msg/alu0100536829
Processing ../data/github-repo-msg/linchproject
Processing ../data/github-repo-msg/ramusus
Processing ../data/github-repo-msg/robertf224
Processing ../data/github-repo-msg/WSULib
Processing ../data/github-repo-msg/halmd-org
Processing ../data/github-repo-msg/mpaladin
Processing ../data/github-repo-msg/phalcon



;-)
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/openSUSE


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/jagregory


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/Taapeli
Processing ../data/github-repo-msg/erdavila
Processing ../data/github-repo-msg/herumi
Processing ../data/github-repo-msg/bryanjswift
Processing ../data/github-repo-msg/Philippe2201
Processing ../data/github-repo-msg/taky
Processing ../data/github-repo-msg/mozilla


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beau

Processing ../data/github-repo-msg/kzeleny
Processing ../data/github-repo-msg/BadrIT
Processing ../data/github-repo-msg/melizalab
Processing ../data/github-repo-msg/jittat
Processing ../data/github-repo-msg/facebook


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beau

Processing ../data/github-repo-msg/blueplanet
../data/github-repo-msg/blueplanet/task_chute.json
Processing ../data/github-repo-msg/openemr


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/hhru
Processing ../data/github-repo-msg/libgit2
Processing ../data/github-repo-msg/kapilt
Processing ../data/github-repo-msg/M7S
Processing ../data/github-repo-msg/crbanman
Processing ../data/github-repo-msg/Netflix
Processing ../data/github-repo-msg/Twisol
Processing ../data/github-repo-msg/BradStevenson
Processing ../data/github-repo-msg/ddsc
Processing ../data/github-repo-msg/PiDyGB
Processing ../data/github-repo-msg/JetBrains
../data/github-repo-msg/JetBrains/FSharper.json
Processing ../data/github-repo-msg/eetac
Processing ../data/github-repo-msg/elastic
Processing ../data/github-repo-msg/gfx
Processing ../data/github-repo-msg/zopefoundation
Processing ../data/github-repo-msg/brenoc
Processing ../data/github-repo-msg/yoshizow
Processing ../data/github-repo-msg/uProxy
Processing ../data/github-repo-msg/tuanhiep
Processing ../data/github-repo-msg/calico-g
Processing ../data/github-repo-msg/tensorflow


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/proycon
Processing ../data/github-repo-msg/siwilkins
Processing ../data/github-repo-msg/englishtown
Processing ../data/github-repo-msg/v3l0c1r4pt0r
Processing ../data/github-repo-msg/yhteentoimivuuspalvelut
../data/github-repo-msg/yhteentoimivuuspalvelut/ckanext-ytp-drupal.json
Processing ../data/github-repo-msg/android-ia
Processing ../data/github-repo-msg/DragonSpawn
Processing ../data/github-repo-msg/opencog


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/otubo
Processing ../data/github-repo-msg/ScottMcMichael
Processing ../data/github-repo-msg/sanguinariojoe
Processing ../data/github-repo-msg/RetroShare
Processing ../data/github-repo-msg/apache


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beau

Processing ../data/github-repo-msg/NathanSweet
Processing ../data/github-repo-msg/russellsimpkins-nyt
Processing ../data/github-repo-msg/pfsense


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/snemetz
Processing ../data/github-repo-msg/fperez
Processing ../data/github-repo-msg/liulhdarks
Processing ../data/github-repo-msg/godiard
Processing ../data/github-repo-msg/globocom
Processing ../data/github-repo-msg/alx
Processing ../data/github-repo-msg/davidkempers
Processing ../data/github-repo-msg/mongodb


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
https://github.com/bjori/phongo/commit/23bb9a63ff70b53892cc68ecd16a802c06fa5847
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.

Processing ../data/github-repo-msg/MeldCE
Processing ../data/github-repo-msg/clementine-player
Processing ../data/github-repo-msg/Shuyang
Processing ../data/github-repo-msg/steveliles
Processing ../data/github-repo-msg/CorlenS
Processing ../data/github-repo-msg/gagle
Processing ../data/github-repo-msg/collegedesis
Processing ../data/github-repo-msg/Khan
Processing ../data/github-repo-msg/NSLS-II
Processing ../data/github-repo-msg/adejoux
Processing ../data/github-repo-msg/CollectorsQuest
Processing ../data/github-repo-msg/lucasr
Processing ../data/github-repo-msg/thoughtbot


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/cerb-plugins
Processing ../data/github-repo-msg/charliemorning
Processing ../data/github-repo-msg/twbs
Processing ../data/github-repo-msg/marsender
Processing ../data/github-repo-msg/ZachOhara
Processing ../data/github-repo-msg/cemagg
Processing ../data/github-repo-msg/reddit


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/aelarabawy
Processing ../data/github-repo-msg/hadleyrich
Processing ../data/github-repo-msg/tbruyelle
Processing ../data/github-repo-msg/jltjohanlindqvist
Processing ../data/github-repo-msg/wardrobecms
Processing ../data/github-repo-msg/Jugendhackt
Processing ../data/github-repo-msg/zfsonlinux


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/nsimplex
Processing ../data/github-repo-msg/SFTtech


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/futureimperfect
Processing ../data/github-repo-msg/spring-projects


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beau

Processing ../data/github-repo-msg/gitlabhq
Processing ../data/github-repo-msg/mikesname
Processing ../data/github-repo-msg/giucam
Processing ../data/github-repo-msg/onivim


  ' that document to Beautiful Soup.' % decoded_markup
  ' that document to Beautiful Soup.' % decoded_markup
  ' that document to Beautiful Soup.' % decoded_markup
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/oguna
Processing ../data/github-repo-msg/akrasic
Processing ../data/github-repo-msg/djblets
Processing ../data/github-repo-msg/grate-driver
Processing ../data/github-repo-msg/yahim91
Processing ../data/github-repo-msg/ntuosproj
Processing ../data/github-repo-msg/Youscribe
Processing ../data/github-repo-msg/zeisler


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
  ' that document to Bea

Processing ../data/github-repo-msg/cloudcopy
Processing ../data/github-repo-msg/nbari
Processing ../data/github-repo-msg/XPerience-NXT
Processing ../data/github-repo-msg/coreone
Processing ../data/github-repo-msg/fenicks
Processing ../data/github-repo-msg/metabrainz


  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/uakatt
Processing ../data/github-repo-msg/achiu
Processing ../data/github-repo-msg/safaci2000
Processing ../data/github-repo-msg/samirahmed
Processing ../data/github-repo-msg/sonatype
Processing ../data/github-repo-msg/TroyShaw
Processing ../data/github-repo-msg/CRAVA


" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup
" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.
  ' that document to Beautiful Soup.' % decoded_markup


Processing ../data/github-repo-msg/php-carteblanche


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



FileNotFoundError: [Errno 2] No such file or directory: '../models/embedding/word2vec_base'

In [113]:
model = train_word_vectors(token_matrix)

In [6]:
github_model = Word2Vec.load('../models/embedding/word2vec_base')

In [9]:
github_model.wv.most_similar(positive=['she', 'king'], negative=['he'])

[('irving', 0.7720848321914673),
 ('orvid', 0.7425923347473145),
 ('tristan', 0.7397049069404602),
 ('arpaia', 0.7366427779197693),
 ('dirk', 0.7364672422409058),
 ('vijay', 0.7295191884040833),
 ('vasudevan', 0.7263808250427246),
 ('stephan', 0.7216164469718933),
 ('boren', 0.7215431332588196),
 ('nelgau', 0.7215031385421753)]

In [3]:
google_model = KeyedVectors.load_word2vec_format('../models/embedding/GoogleNews-vectors-negative300.bin', binary=True)

In [4]:
google_model.most_similar(positive=['she', 'king'], negative=['he'])

[('queen', 0.7633836269378662),
 ('princess', 0.6342117786407471),
 ('queens', 0.5744965076446533),
 ('monarch', 0.5577754974365234),
 ('goddess', 0.5278830528259277),
 ('princesses', 0.5202734470367432),
 ('Queen_Consort', 0.5134546756744385),
 ('very_pampered_McElhatton', 0.5131746530532837),
 ('empress', 0.5119600892066956),
 ('queendom', 0.5091063380241394)]