In [21]:
#import necessary modules
import requests
from bs4 import BeautifulSoup
import csv
import psycopg2

In [22]:
# use BeautifulSoup to parse xml
url = "http://export.arxiv.org/rss/cs/"
resp = requests.get(url)
soup = BeautifulSoup(resp.content, features="xml")
items = soup.findAll('item')

In [23]:
# validate against website how many items are there
len(items)

375

In [24]:
# # validate first item at position 0
item = items[0]
item

<item rdf:about="http://arxiv.org/abs/2102.02204">
<title>Parametrized Quantum Circuits of Synonymous Sentences in Quantum Natural Language Processing. (arXiv:2102.02204v1 [quant-ph])</title>
<link>http://arxiv.org/abs/2102.02204</link>
<description rdf:parseType="Literal">&lt;p&gt;In this paper, we develop a compositional vector-based semantics of positive
transitive sentences in quantum natural language processing for a non-English
language, i.e. Persian, to compare the parametrized quantum circuits of two
synonymous sentences in two languages, English and Persian. By considering
grammar+meaning of a transitive sentence, we translate DisCoCat diagram via
ZX-calculus into quantum circuit form. Also, we use a bigraph method to rewrite
DisCoCat diagram and turn into quantum circuit in the semantic side.
&lt;/p&gt;
</description>
<dc:creator> &lt;a href="http://arxiv.org/find/quant-ph/1/au:+Abbaszadeh_M/0/1/0/all/0/1"&gt;Mina Abbaszadeh&lt;/a&gt;, &lt;a href="http://arxiv.org/find/quant-

In [7]:
# validate description of item
item.description

<description rdf:parseType="Literal">&lt;p&gt;In this paper, we develop a compositional vector-based semantics of positive
transitive sentences in quantum natural language processing for a non-English
language, i.e. Persian, to compare the parametrized quantum circuits of two
synonymous sentences in two languages, English and Persian. By considering
grammar+meaning of a transitive sentence, we translate DisCoCat diagram via
ZX-calculus into quantum circuit form. Also, we use a bigraph method to rewrite
DisCoCat diagram and turn into quantum circuit in the semantic side.
&lt;/p&gt;
</description>

In [8]:
#aggregate title, description, link into news_items
items = soup.findAll('item')

news_items = []

for item in items:
    news_item = {}
    news_item['title'] = item.title.text
    news_item['description'] = item.description.text
    news_item['link'] = item.link.text
    news_items.append(news_item)

In [9]:
# validate news_items
print(news_items)

[{'title': 'Parametrized Quantum Circuits of Synonymous Sentences in Quantum Natural Language Processing. (arXiv:2102.02204v1 [quant-ph])', 'description': '<p>In this paper, we develop a compositional vector-based semantics of positive\ntransitive sentences in quantum natural language processing for a non-English\nlanguage, i.e. Persian, to compare the parametrized quantum circuits of two\nsynonymous sentences in two languages, English and Persian. By considering\ngrammar+meaning of a transitive sentence, we translate DisCoCat diagram via\nZX-calculus into quantum circuit form. Also, we use a bigraph method to rewrite\nDisCoCat diagram and turn into quantum circuit in the semantic side.\n</p>\n', 'link': 'http://arxiv.org/abs/2102.02204'}, {'title': 'Harvest -- An Open Source Toolkit for Extracting Posts and Post Metadata from Web Forums. (arXiv:2102.02240v1 [cs.IR])', 'description': '<p>Automatic extraction of forum posts and metadata is a crucial but challenging\ntask since forums do

In [10]:
# validate first news_items
news_items[0]

{'title': 'Parametrized Quantum Circuits of Synonymous Sentences in Quantum Natural Language Processing. (arXiv:2102.02204v1 [quant-ph])',
 'description': '<p>In this paper, we develop a compositional vector-based semantics of positive\ntransitive sentences in quantum natural language processing for a non-English\nlanguage, i.e. Persian, to compare the parametrized quantum circuits of two\nsynonymous sentences in two languages, English and Persian. By considering\ngrammar+meaning of a transitive sentence, we translate DisCoCat diagram via\nZX-calculus into quantum circuit form. Also, we use a bigraph method to rewrite\nDisCoCat diagram and turn into quantum circuit in the semantic side.\n</p>\n',
 'link': 'http://arxiv.org/abs/2102.02204'}

In [11]:
#create dataframe and CSV
import pandas as pd

df = pd.DataFrame(news_items,columns=['title','link','description'])

In [12]:
# show header of df
df.head()

Unnamed: 0,title,link,description
0,Parametrized Quantum Circuits of Synonymous Se...,http://arxiv.org/abs/2102.02204,"<p>In this paper, we develop a compositional v..."
1,Harvest -- An Open Source Toolkit for Extracti...,http://arxiv.org/abs/2102.02240,<p>Automatic extraction of forum posts and met...
2,Information-theoretic Key Encapsulation and it...,http://arxiv.org/abs/2102.02243,<p>A hybrid encryption scheme is a public key ...
3,Bounds and Genericity of Sum-Rank-Metric Codes...,http://arxiv.org/abs/2102.02244,<p>We derive simplified sphere-packing and Gil...
4,The Forgotten Document-Oriented Database Manag...,http://arxiv.org/abs/2102.02246,"<p>In the current context of Big Data, a multi..."


In [13]:
#write df to csv, optional
df.to_csv('ARXIVdata.csv',index=False, sep=',',encoding = 'utf-8')

In [14]:
#write df to postgres table and create columns
from sqlalchemy import create_engine
engine = create_engine('postgresql://postgres:purplerain@localhost:5432/postgres')
df.to_sql('arxiv_rss', engine)

In [18]:
# connect to postgres DB with credentials
conn = psycopg2.connect(host="localhost", port = 5432, database="postgres", user="postgres", password="purplerain")

In [19]:
# Create a cursor object
cur = conn.cursor()

#  # validate all data from the "arxiv_rss" table in the POSTGRE DB
cur.execute("""rollback""")
cur.execute("""SELECT COUNT(*) FROM arxiv_rss""")
query_results = cur.fetchall()
print(query_results)

[(375,)]


In [25]:
cur.execute("""rollback""")
cur.execute("""SELECT * FROM arxiv_rss WHERE link='http://arxiv.org/abs/2102.02204'""")
query_results = cur.fetchall()
print(query_results)


[(0, 'Parametrized Quantum Circuits of Synonymous Sentences in Quantum Natural Language Processing. (arXiv:2102.02204v1 [quant-ph])', 'http://arxiv.org/abs/2102.02204', '<p>In this paper, we develop a compositional vector-based semantics of positive\ntransitive sentences in quantum natural language processing for a non-English\nlanguage, i.e. Persian, to compare the parametrized quantum circuits of two\nsynonymous sentences in two languages, English and Persian. By considering\ngrammar+meaning of a transitive sentence, we translate DisCoCat diagram via\nZX-calculus into quantum circuit form. Also, we use a bigraph method to rewrite\nDisCoCat diagram and turn into quantum circuit in the semantic side.\n</p>\n')]


In [None]:
# Close the cursor and connection to so the server can allocate
# bandwidth to other requests
cur.close()
conn.close()