In [None]:
from unicodedata import normalize
import pandas as pd
from flask import Flask, jsonify, json
import requests
from bs4 import BeautifulSoup
import numpy as np
import functools

NEED = 'build user interface'
TECH = 'javascript package'

def search_packages(need, tech):

    def _removeNonAscii(s): 
        return "".join(i for i in s if ord(i)<128)

    url = 'http://npmsearch.com/query?q=' + need +\
                  '&size=200&fields=name,keywords,description,readme,homepage'
    header = {
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
        "X-Requested-With": "XMLHttpRequest"
    }
    r = requests.get(url, headers=header)

    json_string = r.text

    json_string = _removeNonAscii(json_string)

    ### BUILDING DATASET ###

    npmsearch = pd.read_json(json_string.lower(), encoding="utf-8")
    npmsearch = npmsearch.iloc[:,:]

    #extraer los nombres de todos los paquetes
    def naming(row):
        return row["results"]["name"][0]

    def getNPMioInfo(pkg):
        response = requests.get("https://api.npms.io/v2/package/"+pkg , headers=header)
        return response.text.lower()

    def npmio(row):
        return getNPMioInfo(row["results"]["name"][0])

    def npming(row):
        return 'https://www.npmjs.com/package/'+(row["results"]["name"][0])

    def getLinks(rowi):
        pkg = rowi['name']
        row = npmsearch.index[npmsearch['name']==pkg]
        if len(row) > 0:
            js = json.loads(npmsearch.iloc[row[0]]['npmio'])
            if 'collected' in js:
                try:
                    dic = json.loads(npmsearch.iloc[row[0]]['npmio'])["collected"]["metadata"]["links"]
                    return dic
                except:
                    dic = {}
            return {}
        else:
            return {}

    def getLink(row, link):
        links = row['links']
        for key, value in links.items():
            if key == link:
                return value
        return ""

    npmsearch['name'] = npmsearch.apply (lambda row: naming(row),axis=1)
    npmsearch['npmio'] = npmsearch.apply (lambda row: npmio(row),axis=1)
    npmsearch['npmpage'] = npmsearch.apply (lambda row: npming(row),axis=1)
    npmsearch['links'] = npmsearch.apply (lambda row: getLinks(row),axis=1)
    npmsearch['npm'] = npmsearch.apply (lambda row: getLink(row,'npm'),axis=1)
    npmsearch['homepage'] = npmsearch.apply (lambda row: getLink(row,'homepage'),axis=1)
    npmsearch['repository'] = npmsearch.apply (lambda row: getLink(row,'repository'),axis=1)

    return npmsearch

npmsearch = search_packages(NEED, TECH)

In [168]:
def getRankings(need, tech):

    dvectors = pd.DataFrame(columns=['NPMSearch','NPM','Bing'])

    def getNPMSearchRanking(need):
        return npmsearch["name"]
    
    def getNPMRanking(need):
        response = requests.get("https://www.npmjs.com/search?q="+ need , headers=header)
        soup = BeautifulSoup(response.text, 'html.parser')
        npm = []
        for h3 in soup.select("div[class^=search__packageList]")[0].select("h3[class^=package-list-item]"):
            npm.append(h3.text)
        return pd.Series(npm)
    
    def getBingRanking(need, tech):
        q = tech + need
        l = '20'

        gheader = {
            "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36"
        }
        response = requests.get("https://www.bing.com/search?q="+q+"&num="+l , headers=gheader)

        soup = BeautifulSoup(response.text, 'html.parser')

        elements = soup.select("h2 a");
        r_urls = []
        for i in range(1,len(elements)):
            elem = elements[i]
            r_urls.append(elem.get('href'))

        def getSoup(url):
            gheader = {
                "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36"
            }
            response = requests.get(url,headers=gheader)
            return BeautifulSoup(response.text, 'html.parser')

        text = ""
        for uri in r_urls:
            text += " " + getSoup(r_urls[0]).get_text()
        
        return pd.Series(entityExtraction(text,npmsearch))

    
    dvectors['NPMSearch'] = getNPMSearchRanking(need)
    dvectors['NPM'] = getNPMRanking(need)
    dvectors['Bing'] = getBingRanking(need,tech)
        
    return dvectors

['interface', 'lib-http-rpc', 'aiw-ui', 'phonegapbuildapi', 'webui.lib', 'interface-addresses', 'on-blow', 'user-config', 'intelligent-ui', 'beacon-user-interface', 'angular-chrome-messaging', 'find-external-interface', 'hudkit', 'promfig', 'implement-js', 'publicio', 'node-cspread', 'simple-ui-builder', 'egg-imodel', 'faction-content-user', 'ad2usb2', 'freedom-social-xmpp']


Unnamed: 0,NPMSearch,NPM,Bing
0,simple-ui-builder,inferno,interface
1,implement-js,dio.js,lib-http-rpc
2,aiw-ui,tailwindcss,aiw-ui
3,centit.easyui,omi,phonegapbuildapi
4,account,ultradom,webui.lib


In [201]:
rankings = dvectors

def aggregate_rankings(rankings):

    rankings = [rankings['NPMSearch'],rankings['NPM'],rankings['Bing']]

    def getUniqueItems(rankings):
        uniqueItems = []
        for r in rankings:
            for ri in r:
                if ri not in uniqueItems and isinstance(ri, str) :
                    uniqueItems.append(ri)
        return uniqueItems

    def getPointsUnassigned(i):
        result = 0;
        for e in range(1,i+1):
            result +=e
        return result

    def bordaScoring(rankings):
        result = {}
        for ranking in rankings:
            scoreRest = getPointsUnassigned(len(uniqueItems) - len(ranking))
            for ri in uniqueItems:
                score = 0.0
                if ri in ranking.values:
                    score = len(uniqueItems) - (ranking[ranking == ri].index[0] + 1)
                else:
                    score = scoreRest / (len(uniqueItems)-len(ranking))

                if ri in result:
                    result[ri] = result[ri] + score 
                else:
                    result[ri] = score

        return result

    def compare(item1, item2):
        if scoring[item1] == scoring[item2]:
            return 0
        elif scoring[item1] > scoring[item2]:
            return 1  
        return -1

    uniqueItems = getUniqueItems(rankings)
    scoring = bordaScoring(rankings)

    rank_aggregated = sorted(uniqueItems, key=functools.cmp_to_key(compare), reverse=True)
    return rank_aggregated

['aiw-ui',
 'implement-js',
 'simple-ui-builder',
 'phonegapbuildapi',
 'faction-content-user',
 'webui.lib',
 'user-config',
 'on-blow',
 'ad2usb2',
 'promfig',
 'intelligent-ui',
 'lib-http-rpc',
 'egg-imodel',
 'node-cspread',
 'find-external-interface',
 'beacon-user-interface',
 'angular-chrome-messaging',
 'hudkit',
 'publicio',
 'interface-addresses',
 'freedom-social-xmpp',
 'interface',
 'spa-component-page',
 'inferno',
 'dio.js',
 'tailwindcss',
 'centit.easyui',
 'omi',
 'account',
 'ultradom',
 'key-ui',
 '@haiku/core',
 'cellularui',
 'babylonjs-gui',
 '@webdollar/user-interface-webdollar',
 'diffhtml',
 'muijs',
 'edx-ui-toolkit',
 's-ui',
 'concise-ui',
 'stb-component-page',
 'cspace-ui',
 'nito',
 'bs-pimp-my-sql',
 'react',
 '@amalto/key-value-editor',
 'npmdoc-pm2-interface',
 'bootstrap-ui',
 'ws-rs',
 'reactive-dom',
 'react-image-annotation-component',
 'tiny-lit',
 'npmtest-pm2-interface',
 '@zebbra/atoms',
 'jsdoc2component',
 'brightwheel',
 'oma-ui',
 'oh-whe

0