In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow import keras
from ast import literal_eval

In [4]:
arxiv_data = pd.read_csv('arxiv_data_85.csv')

In [5]:
arxiv_data.head()

Unnamed: 0,terms,titles,abstracts
0,['cs.LG'],Multi-Level Attention Pooling for Graph Neural...,Graph neural networks (GNNs) have been widely ...
1,"['cs.LG', 'cs.AI']",Decision Forests vs. Deep Networks: Conceptual...,Deep networks and decision forests (such as ra...
2,"['cs.LG', 'cs.CR', 'stat.ML']",Power up! Robust Graph Convolutional Network v...,Graph convolutional networks (GCNs) are powerf...
3,"['cs.LG', 'cs.CR']",Releasing Graph Neural Networks with Different...,With the increasing popularity of Graph Neural...
4,['cs.LG'],Recurrence-Aware Long-Term Cognitive Network f...,Machine learning solutions for pattern classif...


In [6]:
arxiv_data.shape

(56181, 3)

In [7]:
arxiv_data.isnull().sum()

terms        0
titles       0
abstracts    0
dtype: int64

In [8]:
arxiv_data.duplicated().sum()

15054

In [9]:
arxiv_data.drop(columns= ["terms", "abstracts"], inplace=True)

In [10]:
arxiv_data.drop_duplicates(inplace = True)
arxiv_data.reset_index(drop = True, inplace=True)

In [11]:
pd.set_option('display.max_colwidth', None)
arxiv_data

Unnamed: 0,titles
0,Multi-Level Attention Pooling for Graph Neural Networks: Unifying Graph Representations with Multiple Localities
1,Decision Forests vs. Deep Networks: Conceptual Similarities and Empirical Differences at Small Sample Sizes
2,Power up! Robust Graph Convolutional Network via Graph Powering
3,Releasing Graph Neural Networks with Differential Privacy Guarantees
4,Recurrence-Aware Long-Term Cognitive Network for Explainable Pattern Classification
...,...
41100,An experimental study of graph-based semi-supervised classification with additional node information
41101,Bayesian Differential Privacy through Posterior Sampling
41102,Mining Spatio-temporal Data on Industrialization from Historical Registries
41103,Wav2Letter: an End-to-End ConvNet-based Speech Recognition System


In [12]:
!pip install -U -q sentence-transformers


[notice] A new release of pip is available: 24.0 -> 24.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [13]:
pip install --upgrade pip

Collecting pip
  Downloading pip-24.1-py3-none-any.whl.metadata (3.6 kB)
Downloading pip-24.1-py3-none-any.whl (1.8 MB)
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
    --------------------------------------- 0.0/1.8 MB 487.6 kB/s eta 0:00:04
   -- ------------------------------------- 0.1/1.8 MB 1.0 MB/s eta 0:00:02
   ------ --------------------------------- 0.3/1.8 MB 1.9 MB/s eta 0:00:01
   ------------- -------------------------- 0.6/1.8 MB 2.9 MB/s eta 0:00:01
   ----------------------- ---------------- 1.1/1.8 MB 4.3 MB/s eta 0:00:01
   ------------------------ --------------- 1.1/1.8 MB 3.8 MB/s eta 0:00:01
   ---------------------------------- ----- 1.6/1.8 MB 4.5 MB/s eta 0:00:01
   ---------------------------------- ----- 1.6/1.8 MB 4.5 MB/s eta 0:00:01
   ---------------------------------------  1.8/1.8 MB 4.2 MB/s eta 0:00:01
   ---------------------------------------- 1.8/1.8 MB 3

In [18]:
from sentence_transformers import SentenceTransformer,util
model = SentenceTransformer('all-MiniLM-L6-v2')
sentences = arxiv_data['titles']
embeddings = model.encode(sentences)



In [19]:
embeddings

array([[ 0.06643401, -0.04954606,  0.06388087, ...,  0.00106306,
        -0.12156384, -0.06962777],
       [ 0.09212259, -0.07606941,  0.06572863, ..., -0.0856517 ,
        -0.09266542,  0.00725291],
       [-0.08162686,  0.02428935,  0.0188874 , ...,  0.00806164,
        -0.05129533, -0.05873999],
       ...,
       [ 0.01227977, -0.08568837, -0.02782775, ..., -0.05257975,
        -0.10806686,  0.07843312],
       [-0.07258197, -0.12690923, -0.00535551, ...,  0.03597702,
        -0.03986149, -0.05971031],
       [ 0.0076887 , -0.10124184,  0.0890985 , ..., -0.08199871,
        -0.05649745,  0.0900706 ]], dtype=float32)

In [20]:
c=0
for sentence, embedding in zip(sentences,embeddings):
    print("Sentences :", sentence)
    print("Embedding length:", len(embedding))
    print("")
    if c>=5:
        break
    c+=1    

Sentences : Multi-Level Attention Pooling for Graph Neural Networks: Unifying Graph Representations with Multiple Localities
Embedding length: 384

Sentences : Decision Forests vs. Deep Networks: Conceptual Similarities and Empirical Differences at Small Sample Sizes
Embedding length: 384

Sentences : Power up! Robust Graph Convolutional Network via Graph Powering
Embedding length: 384

Sentences : Releasing Graph Neural Networks with Differential Privacy Guarantees
Embedding length: 384

Sentences : Recurrence-Aware Long-Term Cognitive Network for Explainable Pattern Classification
Embedding length: 384

Sentences : Lifelong Graph Learning
Embedding length: 384



In [21]:
import pickle 
with open('embeddings.pkl', 'wb') as f:
    pickle.dump(embeddings,f)

with open('sentences.pkl','wb') as f:
    pickle.dump(sentences,f)

with open('rec_model.pkl','wb') as f:
    pickle.dump(model,f)

In [23]:
# load save files
embeddings = pickle.load(open('embeddings.pkl','rb'))
sentences = pickle.load(open('sentences.pkl','rb'))
rec_model = pickle.load(open('rec_model.pkl','rb'))

In [27]:
import torch
def recommendation(input_paper):
    cosine_scores = util.cos_sim(embeddings,rec_model.encode(input_paper))
    top_similar_papers = torch.topk(cosine_scores, dim=0, k=5, sorted=True)
    papers_list = []
    for i in top_similar_papers.indices:
        papers_list.append(sentences[i.item()])

    return papers_list

In [28]:
input_paper = input("Enter the title of paper")
recommend_papers = recommendation(input_paper)

print("Reccomended paper by the model.........")
print("=============================================")
for paper in recommend_papers:
    print(paper)


Enter the title of paper attension is all of you need


Reccomended paper by the model.........
Predicting Blood Pressure Response to Fluid Bolus Therapy Using Attention-Based Neural Networks for Clinical Interpretability
BodyPressure -- Inferring Body Pose and Contact Pressure from a Depth Image
Is Fast Adaptation All You Need?
Estimation of Continuous Blood Pressure from PPG via a Federated Learning Approach
N2RPP: An Adversarial Network to Rebuild Plantar Pressure for ACLD Patients


In [29]:
input_paper = input("Enter the title of paper")
recommend_papers = recommendation(input_paper)

print("Reccomended paper by the model.........")
print("=============================================")
for paper in recommend_papers:
    print(paper)


Enter the title of paper ethinicity detection from the facial images


Reccomended paper by the model.........
Uncovering the Bias in Facial Expressions
Identifying individual facial expressions by deconstructing a neural network
Suppressing Uncertainties for Large-Scale Facial Expression Recognition
Spontaneous Subtle Expression Detection and Recognition based on Facial Strain
Transformation on Computer-Generated Facial Image to Avoid Detection by Spoofing Detector


In [30]:
import sentence_transformers
import tensorflow
import torch
print(torch.__version__)
print(sentence_transformers.__version__)
print(tensorflow.__version__)

2.3.1+cpu
3.0.1
2.16.1
