In [70]:
import numpy as np
import torch 
from DNN import FFN
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer

In [84]:
class model():
    
    def __init__(self,
                 des_path,
                 des_feat_path,
                 install_path,
                 install_feat_path,
                 invoc_path,
                 invoc_feat_path,
                 cite_path,
                 cite_feat_path
                ):
        self.desc = "This class contains all four binary classifier"
        self.des_path = des_path
        self.install_path = install_path
        self.invoc_path = invoc_path
        self.cite_path = cite_path
        
        self.des_feat_path = des_feat_path
        self.install_feat_path = install_feat_path
        self.invoc_feat_path = invoc_feat_path
        self.cite_feat_path = cite_feat_path
        
        self.build()
        
    def build(self):
        self.model_desc = torch.load(self.des_path)
        self.model_install = torch.load(self.install_path)
        self.model_invoc = torch.load(self.invoc_path)
        self.model_cite = torch.load(self.cite_path)
        
        desc = pd.read_csv(self.des_feat_path).values
        install = pd.read_csv(self.install_feat_path).values
        invoc = pd.read_csv(self.invoc_feat_path).values
        cite = pd.read_csv(self.cite_feat_path).values
        self.desc_feat = {}
        self.install_feat = {}
        self.invoc_feat = {}
        self.cite_feat = {}
        for i in range(len(desc)):
            self.desc_feat[desc[i][0]] = i
        for i in range(len(install)):
            self.install_feat[install[i][0]] = i
        for i in range(len(invoc)):
            self.invoc_feat[invoc[i][0]] = i
        for i in range(len(cite)):
            self.cite_feat[cite[i][0]] = i        

        self.desc_vec = CountVectorizer(vocabulary = self.desc_feat)
        self.install_vec = CountVectorizer(vocabulary = self.install_feat)
        self.invoc_vec = CountVectorizer(vocabulary = self.invoc_feat)
        self.cite_vec = CountVectorizer(vocabulary = self.cite_feat)
        
        
        
    def inference(self,corpus):
        new_corpus = list(map(self.lower_stopwords,corpus.split(".")))
        encoded_corpus_desc = self.desc_vec.transform(new_corpus).toarray()
        encoded_corpus_install = self.install_vec.transform(new_corpus).toarray()
        encoded_corpus_invoc = self.invoc_vec.transform(new_corpus).toarray()
        encoded_corpus_cite = self.cite_vec.transform(new_corpus).toarray()
        
        o1 = self.model_desc(torch.tensor(encoded_corpus_desc).float())
        o2 = self.model_install(torch.tensor(encoded_corpus_install).float())
        o3 = self.model_invoc(torch.tensor(encoded_corpus_invoc).float())
        o4 = self.model_cite(torch.tensor(encoded_corpus_cite).float())
        desc_pred = torch.argmax(o1,dim=1).detach().numpy()
        install_pred = torch.argmax(o2,dim=1).detach().numpy()
        invoc_pred = torch.argmax(o3,dim=1).detach().numpy()
        cite_pred = torch.argmax(o4,dim=1).detach().numpy()
        return corpus.split("."),desc_pred,install_pred,invoc_pred,cite_pred
    def lower_stopwords(self,x):
        x = re.sub(r'[^a-zA-Z\s]', '', x, re.I|re.A)
        x = x.lower()
        x = x.strip()
        text_tokens = [word for word in word_tokenize(x) if word not in stopwords.words()]
        return " ".join(text_tokens)
    def show_result(self,corpus):
        new_corpus,desc_pred,install_pred,invoc_pred,cite_pred = self.inference(corpus)
        for i in range(len(new_corpus)): 
            text = new_corpus[i]
            label = []
            if(desc_pred[i]==1):
                label.append("descripetion")
            if(install_pred[i]==1):
                label.append("installation")
            if(invoc_pred[i]==1):
                label.append("invocation")
            if(cite_pred[i]==1):
                label.append("citation")
            text += " ->" + str(tuple(label))
            print(text)
        
    

In [85]:
des_path = "../saved_models/DNN_description.pt"
des_feat_path = "../saved_models/description_feat.csv"

install_path = "../saved_models/DNN_install.pt"
install_feat_path = "../saved_models/install_feat.csv"

invoc_path = "../saved_models/DNN_invocation.pt"
invoc_feat_path = "../saved_models/invocation_feat.csv"

cite_path = "../saved_models/DNN_citation.pt"
cite_feat_path = "../saved_models/citation_feat.csv"

classifier = model(des_path,
                 des_feat_path,
                 install_path,
                 install_feat_path,
                 invoc_path,
                 invoc_feat_path,
                 cite_path,
                 cite_feat_path
            )
corpus = "pandas is a Python package that provides fast,\
flexible, and expressive data structures designed to make \
working with structured (tabular, multidimensional, potenti\
ally heterogeneous) and time series data both easy and intu\
itive. It aims to be the fundamental high-level building blo\
ck for doing practical, real world data analysis in Python. A\
dditionally, it has the broader goal of becoming the most power\
ful and flexible open source data analysis / manipulation tool a\
vailable in any language. It is already well on its way toward this goal."
new_corpus,desc_pred,install_pred,invoc_pred,cite_pred = classifier.inference(corpus)
classifier.show_result(corpus)


pandas is a Python package that provides fast,flexible, and expressive data structures designed to make working with structured (tabular, multidimensional, potentially heterogeneous) and time series data both easy and intuitive ->('descripetion',)
 It aims to be the fundamental high-level building block for doing practical, real world data analysis in Python ->('descripetion',)
 Additionally, it has the broader goal of becoming the most powerful and flexible open source data analysis / manipulation tool available in any language ->('descripetion',)
 It is already well on its way toward this goal ->('descripetion', 'installation')
 ->('installation', 'invocation')
