In [1]:
import joblib
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import normalize



In [2]:
from google.colab import drive

drive.mount('/content/gdrive')


Mounted at /content/gdrive


In [3]:
import pandas as pd
data_frame = pd.read_csv('/content/gdrive/MyDrive/dataset/papers.csv' , index_col = False)
data_frame

Unnamed: 0.1,Unnamed: 0,no,id,title,abstract,citation,references
0,0,1,4ab39729-af77-46f7-a662-16984fb9c1db,Attractor neural networks with activity-depend...,We studied an autoassociative neural network w...,4017c9d2-9845-4ad2-ad5b-ba65523727c5,"4017c9d2-9845-4ad2-ad5b-ba65523727c5,b1187381-..."
1,1,2,4ab3a4cf-1d96-4ce5-ab6f-b3e19fc260de,A characterization of balanced episturmian seq...,It is well-known that Sturmian sequences are t...,1c655ee2-067d-4bc4-b8cc-bc779e9a7f10,"1c655ee2-067d-4bc4-b8cc-bc779e9a7f10,2e4e57ca-..."
2,2,3,4ab3a98c-3620-47ec-b578-884ecf4a6206,Exploring the space of a human action,One of the fundamental challenges of recognizi...,056116c1-9e7a-4f9b-a918-44eb199e67d6,"056116c1-9e7a-4f9b-a918-44eb199e67d6,05ac52a1-..."
3,3,4,4ab3b585-82b4-4207-91dd-b6bce7e27c4e,Generalized upper bounds on the minimum distan...,This paper generalizes previous optimal upper ...,01a765b8-0cb3-495c-996f-29c36756b435,"01a765b8-0cb3-495c-996f-29c36756b435,5dbc8ccb-..."
4,4,5,4ab3e768-78c9-4497-8b8e-9e934cb5f2e4,Applying BCMP multi-class queueing networks fo...,Queueing networks with multiple classes of cus...,1c26e228-57d2-4b2c-b0c9-8d5851c17fac,"1c26e228-57d2-4b2c-b0c9-8d5851c17fac,75399207-..."
...,...,...,...,...,...,...,...
4508,5010,5011,4c5e8d90-6032-414b-bbc0-aa38d060bf40,Software news and updates,Abstract#R##N##R##N#A parallel version of the ...,3008b961-676b-4443-b1db-5297ebe97e9c,3008b961-676b-4443-b1db-5297ebe97e9c
4509,5011,5012,4c5ec7a5-063a-46f5-978c-ab3cf49aa188,Jointly multi-user detection and channel estim...,This work aims at proposing the use of the evo...,26725b1c-5573-4f4d-b4fc-34fe96d27ed8,"26725b1c-5573-4f4d-b4fc-34fe96d27ed8,31dac1f4-..."
4510,5012,5013,4c5ef00f-63d4-4728-9d20-26fb35b7a647,An adaptive clustering algorithm for image seg...,A generalization of the K-means clustering alg...,0e30c8d3-74fa-458b-8b21-40fdb9a3a7fd,"0e30c8d3-74fa-458b-8b21-40fdb9a3a7fd,31e350c1-..."
4511,5013,5014,4c5efec8-8743-42fb-8455-eea4852e4225,On the relationship between formal semantics a...,On the relationship between formal semantics a...,41385c93-631e-468a-a90c-ff4a4ff693f8,"41385c93-631e-468a-a90c-ff4a4ff693f8,608db99a-..."


In [4]:
class PaperRecommendationModel(BaseEstimator, TransformerMixin):
    def __init__(self, k=5):
        self.k = k
        self.vectorizer = TfidfVectorizer()
        self.knn_model = NearestNeighbors(n_neighbors=k, metric='cosine')

    def fit(self, X, y=None):
        # Preprocessing
        text_data = X['title'] + ' ' + X['abstract']
        text_data = text_data.fillna('')
        self.feature_vectors = self.vectorizer.fit_transform(text_data)


        self.feature_vectors = normalize(self.feature_vectors)

        self.knn_model.fit(self.feature_vectors)

        return self

    def predict(self, query):

        query_vector = self.vectorizer.transform([query])
        query_vector = normalize(query_vector)


        distances, indices = self.knn_model.kneighbors(query_vector, n_neighbors=self.k+1)
        distances = distances.flatten()[1:]
        indices = indices.flatten()[1:]

        recommended_papers = data_frame.iloc[indices]
        recommended_papers['distance'] = distances

        return recommended_papers

    def transform(self, X):

        return X

model = PaperRecommendationModel()

model.fit(data_frame)


In [5]:
joblib.dump(model, 'paper_recommendation_model.joblib')


loaded_model = joblib.load('paper_recommendation_model.joblib')


query = "neural networks"
recommendations = loaded_model.predict(query)
print(recommendations)

      Unnamed: 0    no                                    id  \
3886        4300  4301  4c216309-49e4-4726-947a-c73fb986baa2   
2009        2221  2222  4b700af9-d5be-4baa-9a6d-ae7e53960bbb   
1281        1424  1425  4b2b5bd9-568c-43bb-a1eb-336c948599a5   
1949        2156  2157  4b6a1425-0923-44a6-8516-83e29efc0078   
3969        4395  4396  4c28dedb-a31d-4225-8574-5176c0a9018a   

                                                  title  \
3886  Improving the performance of neural networks w...   
2009  Hardware-friendly Higher-Order Neural Network ...   
1281  Face recognition through a chaotic neural netw...   
1949       Real-time motion planning of car-like robots   
3969  Neural network based admission controller for ...   

                                               abstract  \
3886  Neural Networks such as RBFN and BPNN have bee...   
2009  In this paper, we study the class of Higher-Or...   
1281  Face recognition through a chaotic neural netw...   
1949  A neural network a

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recommended_papers['distance'] = distances


In [7]:
joblib.dump(model, 'paper_recommendation_model.pkl')


loaded_model = joblib.load('paper_recommendation_model.pkl')


query = "neural networks"
recommendations = loaded_model.predict(query)
print(recommendations)

      Unnamed: 0    no                                    id  \
3886        4300  4301  4c216309-49e4-4726-947a-c73fb986baa2   
2009        2221  2222  4b700af9-d5be-4baa-9a6d-ae7e53960bbb   
1281        1424  1425  4b2b5bd9-568c-43bb-a1eb-336c948599a5   
1949        2156  2157  4b6a1425-0923-44a6-8516-83e29efc0078   
3969        4395  4396  4c28dedb-a31d-4225-8574-5176c0a9018a   

                                                  title  \
3886  Improving the performance of neural networks w...   
2009  Hardware-friendly Higher-Order Neural Network ...   
1281  Face recognition through a chaotic neural netw...   
1949       Real-time motion planning of car-like robots   
3969  Neural network based admission controller for ...   

                                               abstract  \
3886  Neural Networks such as RBFN and BPNN have bee...   
2009  In this paper, we study the class of Higher-Or...   
1281  Face recognition through a chaotic neural netw...   
1949  A neural network a

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recommended_papers['distance'] = distances
