In [1]:
import numpy as np
import torch 
from DNN import FFN
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
import urllib
from bs4 import BeautifulSoup

In [2]:
class model():
    
    def __init__(self,
                 des_path,
                 des_feat_path,
                 install_path,
                 install_feat_path,
                 invoc_path,
                 invoc_feat_path,
                 cite_path,
                 cite_feat_path
                ):
        self.desc = "This class contains all four binary classifier"
        self.des_path = des_path
        self.install_path = install_path
        self.invoc_path = invoc_path
        self.cite_path = cite_path
        
        self.des_feat_path = des_feat_path
        self.install_feat_path = install_feat_path
        self.invoc_feat_path = invoc_feat_path
        self.cite_feat_path = cite_feat_path
        
        self.build()
        
    def build(self):
        self.model_desc = torch.load(self.des_path)
        self.model_install = torch.load(self.install_path)
        self.model_invoc = torch.load(self.invoc_path)
        self.model_cite = torch.load(self.cite_path)
        
        desc = pd.read_csv(self.des_feat_path).values
        install = pd.read_csv(self.install_feat_path).values
        invoc = pd.read_csv(self.invoc_feat_path).values
        cite = pd.read_csv(self.cite_feat_path).values
        self.desc_feat = {}
        self.install_feat = {}
        self.invoc_feat = {}
        self.cite_feat = {}
        for i in range(len(desc)):
            self.desc_feat[desc[i][0]] = i
        for i in range(len(install)):
            self.install_feat[install[i][0]] = i
        for i in range(len(invoc)):
            self.invoc_feat[invoc[i][0]] = i
        for i in range(len(cite)):
            self.cite_feat[cite[i][0]] = i        

        self.desc_vec = CountVectorizer(vocabulary = self.desc_feat)
        self.install_vec = CountVectorizer(vocabulary = self.install_feat)
        self.invoc_vec = CountVectorizer(vocabulary = self.invoc_feat)
        self.cite_vec = CountVectorizer(vocabulary = self.cite_feat)
        
        
        
    def inference(self,corpus):
        new_corpus = list(map(self.lower_stopwords,corpus.split(".")))
        encoded_corpus_desc = self.desc_vec.transform(new_corpus).toarray()
        encoded_corpus_install = self.install_vec.transform(new_corpus).toarray()
        encoded_corpus_invoc = self.invoc_vec.transform(new_corpus).toarray()
        encoded_corpus_cite = self.cite_vec.transform(new_corpus).toarray()
        
        o1 = self.model_desc(torch.tensor(encoded_corpus_desc).float())
        o2 = self.model_install(torch.tensor(encoded_corpus_install).float())
        o3 = self.model_invoc(torch.tensor(encoded_corpus_invoc).float())
        o4 = self.model_cite(torch.tensor(encoded_corpus_cite).float())
        desc_pred = torch.argmax(o1,dim=1).detach().numpy()
        install_pred = torch.argmax(o2,dim=1).detach().numpy()
        invoc_pred = torch.argmax(o3,dim=1).detach().numpy()
        cite_pred = torch.argmax(o4,dim=1).detach().numpy()
        return corpus.split("."),desc_pred,install_pred,invoc_pred,cite_pred
    def lower_stopwords(self,x):
        x = re.sub(r'[^a-zA-Z\s]', '', x, re.I|re.A)
        x = x.lower()
        x = x.strip()
        text_tokens = [word for word in word_tokenize(x) if word not in stopwords.words()]
        return " ".join(text_tokens)
    def show_result(self,corpus):
        new_corpus,desc_pred,install_pred,invoc_pred,cite_pred = self.inference(corpus)
        for i in range(len(new_corpus)): 
            text = new_corpus[i]
            label = []
            if(desc_pred[i]==1):
                label.append("descripetion")
            if(install_pred[i]==1):
                label.append("installation")
            if(invoc_pred[i]==1):
                label.append("invocation")
            if(cite_pred[i]==1):
                label.append("citation")
            text += " ->" + str(tuple(label))
            print(text)
        
    

In [4]:
des_path = "../saved_models/DNN_description.pt"
des_feat_path = "../saved_models/description_feat.csv"

install_path = "../saved_models/DNN_install.pt"
install_feat_path = "../saved_models/install_feat.csv"

invoc_path = "../saved_models/DNN_invocation.pt"
invoc_feat_path = "../saved_models/invocation_feat.csv"

cite_path = "../saved_models/DNN_citation.pt"
cite_feat_path = "../saved_models/citation_feat.csv"

classifier = model(des_path,
                 des_feat_path,
                 install_path,
                 install_feat_path,
                 invoc_path,
                 invoc_feat_path,
                 cite_path,
                 cite_feat_path
            )
# create input
url = input()
response = urllib.request.urlopen(url)
page = response.read()
soup = BeautifulSoup(page, "lxml")
# kill all script and style elements
for script in soup(["script", "style"]):
   script.extract()    # rip it out
# get text
text = soup.get_text()
# break into lines and remove leading and trailing space on each
lines = (line.strip() for line in text.splitlines())
# break multi-headlines into a line each
chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
# drp blank lines
text = 'n'.join(chunk for chunk in chunks if chunk)


new_corpus,desc_pred,install_pred,invoc_pred,cite_pred = classifier.inference(text)
classifier.show_result(text)


GitHub - snap-stanford/ogb: Benchmark datasets, data loaders, and evaluators for graph machine learningnSkip to contentnSign upnWhy GitHub?nFeatures →nCode reviewnProject managementnIntegrationsnActionsnPackagesnSecuritynTeam managementnHostingnMobilenCustomer stories →nSecurity →nTeamnEnterprisenExplorenExplore GitHub →nLearn & contributenTopicsnCollectionsnTrendingnLearning LabnOpen source guidesnConnect with othersnEventsnCommunity forumnGitHub EducationnGitHub Stars programnMarketplacenPricingnPlans →nCompare plansnContact SalesnNonprofit →nEducation →nIn this repositorynAll GitHubn↵nJump ton↵nNo suggested jump to resultsnIn this repositorynAll GitHubn↵nJump ton↵nIn this repositorynAll GitHubn↵nJump ton↵nSign innSign upnsnap-stanfordn/nogbnWatchn34nStarn615nForkn84nBenchmark datasets, data loaders, and evaluators for graph machine learningnogb ->('descripetion',)
stanford ->('installation', 'invocation')
edunMIT Licensen615nstarsn84nforksnStarnWatchnCodenIssuesn0nPull requestsn0nAc