In [27]:
import spacy
nlp = spacy.load('en_core_web_sm') 

from sutime import SUTime
sutime = SUTime(mark_time_ranges=True, include_range=True)

import csv, sqlite3

In [29]:
def csv_to_table(path, csv_name, table_name):
    with open(dataset_path+csv_name,'r') as fin:
        dr = csv.DictReader(fin)
        to_db=[tuple(i.values()) for i in dr]
    
    count=len(dr.fieldnames)
    bindings="?, "*count

    cur.executemany("INSERT INTO "+table_name+" VALUES ("+bindings[:-2]+");", to_db)

In [30]:
con = sqlite3.connect(r"D:\My Study Folder\Others\NLP-Search-Engine-COVID-19-Dataset\dataset\covid-19\mysql_database\covid19.db")
cur = con.cursor()

dataset_path="../dataset/covid-19/required_only/"

tables=[("worldwide_aggregate"),("reference"),("timeseries"),("us")]
for table in tables:
    cur.execute("DROP TABLE IF EXISTS "+table+";")

cur.execute("create table worldwide_aggregate(Date Date NOT NULL, Confirmed BIGINT NOT NULL, Recovered BIGINT NOT NULL, Deaths BIGINT NOT NULL, Increase_rate FLOAT default NULL, PRIMARY KEY (Date));")

csv_to_table(dataset_path,"worldwide-aggregate.csv", tables[0])

cur.execute("create table us(Date Date NOT NULL, Admin2 VARCHAR(100) NOT NULL, Province_State VARCHAR(100) NOT NULL, Confirmed BIGINT NOT NULL, Deaths BIGINT NOT NULL, Country_Region VARCHAR(100) NOT NULL, PRIMARY KEY (Date, Admin2, Province_State));")

csv_to_table(dataset_path,"us_simplified.csv", tables[3])

cur.execute("create table reference(UID INT NOT NULL, iso2 VARCHAR(20), iso3 VARCHAR(20), code3 INT, FIPS INT, Admin2 VARCHAR(100) NOT NULL, Province_State VARCHAR(100) NOT NULL, Country_Region VARCHAR(100) NOT NULL, Lat FLOAT NOT NULL, Long_ FLOAT NOT NULL, Combined_Key VARCHAR(100), Popolation BIGINT NOT NULL, PRIMARY KEY (UID));")

csv_to_table(dataset_path,"reference.csv", tables[1])

cur.execute("create table timeseries(Date Date NOT NULL, Country_Region VARCHAR(100) NOT NULL, Province_State VARCHAR(100), Confirmed BIGINT NOT NULL, Recovered BIGINT NOT NULL, Deaths BIGINT NOT NULL, PRIMARY KEY (Date, Country_Region, Province_State));")

csv_to_table(dataset_path,"time-series-19-covid-combined.csv", tables[2])

con.commit()

# with open('../dataset/covid-19/mysql_database/dump.sql','w') as fp:
#     for line in con.iterdump():
#         fp.write('%s\n' % line)

con.close()


In [16]:
queries=[]
with open("../possible-questions.txt","r") as f:
    for line in f.readlines():
        line = line[:-1]
        if(line):
            queries.append(line)

In [17]:
def print_entities(sentence):
	doc = nlp(sentence)
	print("----> Entities:")
	for ent in doc.ents: 
		print("-------->",ent.text, ent.start_char, ent.end_char, ent.label_) 

def print_tokens(sentence):
	doc = nlp(sentence) 
	print("----> Tokens:")
	for token in doc: 
		# if(token.dep_ == "nsubj"):
		print("-------->", token.text, token.pos_, token.dep_) 

In [31]:
def get_places(entities):
    places=[]
    for ent in entities:
        if ent.label_ == 'GPE':
            places.append(ent.text)
    
    if len(places) == 0:
        places.append('world')
    
    return places

def get_time_duration(query):
    time=[]
    parsed=sutime.parse(query)
    for item in parsed:
        time.append(item['value'])

    return time

def parse_parameters(query):
    doc=nlp(query)
    entities=doc.ents

    place = get_places(entities)
    print("Place -> ",place)

    time_duration = get_time_duration(query)
    print("Time Duration -> ", time_duration)

In [32]:
def process_query(query):
    parse_parameters(query)
    print(query)
    print_entities(query)
    #print_tokens(sent)
    doc=nlp(query)
    print([chunk.text for chunk in doc.noun_chunks])
    print()

for q in queries:
    process_query(q)

Place ->  ['Afganistan']
Time Duration ->  []
total number of cases found in Afganistan?
----> Entities:
--------> Afganistan 31 41 GPE
['total number', 'cases', 'Afganistan']

Place ->  ['Colombia']
Time Duration ->  ['2020-07']
total number of cases found in Colombia till july?
----> Entities:
--------> Colombia 31 39 GPE
--------> july 45 49 DATE
['total number', 'cases', 'Colombia', 'july']

Place ->  ['France']
Time Duration ->  ['2021-04']
total number of new cases found in France in april?
----> Entities:
--------> France 35 41 GPE
--------> april 45 50 DATE
['total number', 'new cases', 'France', 'april']

Place ->  ['Greece']
Time Duration ->  [{'end': 'XXXX-09', 'begin': 'XXXX-04'}]
total number of new cases found in Greece between april to september?
----> Entities:
--------> Greece 35 41 GPE
--------> between april to september 42 68 DATE
['total number', 'new cases', 'Greece', 'april', 'september']

Place ->  ['Hungary']
Time Duration ->  []
total number cases recovered in