Skip to content

Commit

Permalink
towards #59
Browse files Browse the repository at this point in the history
  • Loading branch information
WolfgangFahl committed Nov 19, 2023
1 parent bdfc1b8 commit 43b2041
Show file tree
Hide file tree
Showing 13 changed files with 187 additions and 361 deletions.
93 changes: 44 additions & 49 deletions corpus/web/eventseries.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,33 @@

import io
import re
import pandas as pd
from dataclasses import dataclass, asdict
from distutils.util import strtobool
from fastapi import Response
from fastapi.responses import JSONResponse,FileResponse
from tabulate import tabulate
from typing import List

from fb4.widgets import LodTable
from flask import Blueprint, request, jsonify, send_file
from spreadsheet.spreadsheet import ExcelDocument
from corpus.lookup import CorpusLookup

from corpus.datasources.openresearch import OREvent, OREventSeries
from corpus.eventseriescompletion import EventSeriesCompletion


class EventSeriesBlueprint():
class EventSeriesAPI():
"""
API service for event series data
"""

def __init__(self, app, name: str, template_folder:str=None, appWrap=None):
def __init__(self,lookup:CorpusLookup):
'''
construct me
Args:
name(str): my name
template_folder(str): the template folder
lookup
'''
self.name = name
if template_folder is not None:
self.template_folder = template_folder
else:
self.template_folder = 'eventseries'
self.blueprint = Blueprint(name, __name__, template_folder=self.template_folder,url_prefix="/eventseries")
self.app = app
self.appWrap = appWrap
self.lookup=lookup

@self.blueprint.route('/<name>')
def getEventSeries(name: str):
return self.getEventSeries(name)

app.register_blueprint(self.blueprint)

def getEventSeries(self, name: str):
def getEventSeries(self,name: str,bks:str=None,reduce:bool=False):
'''
Query multiple datasources for the given event series
Expand All @@ -48,22 +36,19 @@ def getEventSeries(self, name: str):
'''
multiQuery = "select * from {event}"
idQuery = f"""select source,eventId from event where lookupAcronym LIKE "{name} %" order by year desc"""
dictOfLod = self.appWrap.lookup.getDictOfLod4MultiQuery(multiQuery, idQuery)
if request.values.get("bk"):
bkParam = request.values.get("bk")
allowedBks = bkParam.split(",") if bkParam else None
dictOfLod = self.lookup.getDictOfLod4MultiQuery(multiQuery, idQuery)
if bks:
allowedBks = bks.split(",") if bks else None
self.filterForBk(dictOfLod.get("tibkat"), allowedBks)
reduceRecords = request.values.get("reduce")
if reduceRecords is not None and (reduceRecords == "" or bool(strtobool(reduceRecords))):
if reduce:
for source in ["tibkat", "dblp"]:
sourceRecords = dictOfLod.get(source)
if sourceRecords:
reducedRecords = EventSeriesCompletion.filterDuplicatesByTitle(sourceRecords)
dictOfLod[source] = reducedRecords
return self.convertToRequestedFormat(name, dictOfLod)
return dictOfLod

@staticmethod
def filterForBk(lod:List[dict], allowedBks:List[str]):
def filterForBk(self,lod:List[dict], allowedBks:List[str]):
"""
Filters the given dict to only include the records with their bk in the given list of allowed bks
Args:
Expand Down Expand Up @@ -151,10 +136,10 @@ def generateSeriesSpreadsheet(self, name:str, dictOfLods: dict) -> ExcelDocument
spreadsheet.addTable(sheetName, lod)
return spreadsheet

def convertToRequestedFormat(self, name:str, dictOfLods: dict):
async def convertToRequestedFormat(self, name: str, dictOfLods: dict, markup_format: str = "json"):
"""
Converts the given dicts of lods to the requested format.
Supported formats: json, html
Converts the given dicts of lods to the requested markup format.
Supported formats: json, html, excel, pd_excel, various tabulate formats.
Default format: json
Args:
Expand All @@ -163,21 +148,31 @@ def convertToRequestedFormat(self, name:str, dictOfLods: dict):
Returns:
Response
"""
formatParam = request.values.get('format', "")
if formatParam.lower() == "html":
tables = []
for name, lod in dictOfLods.items():
tables.append(LodTable(name=name, lod=lod))
template = "cc/result.html"
title = "Query Result"
result = "".join([str(t) for t in tables])
html = self.appWrap.render_template(template, title=title, activeItem="", result=result)
return html
elif formatParam.lower() == "excel":
if markup_format.lower() == "excel":
# Custom Excel spreadsheet generation
spreadsheet = self.generateSeriesSpreadsheet(name, dictOfLods)
return send_file(spreadsheet.toBytesIO(), as_attachment=True, download_name=spreadsheet.filename, mimetype=spreadsheet.MIME_TYPE)
spreadsheet_io = io.BytesIO(spreadsheet.toBytesIO().getvalue()) # Ensure it's a BytesIO object
spreadsheet_io.seek(0)
return FileResponse(spreadsheet_io, media_type="application/vnd.ms-excel", filename=f"{name}.xlsx")

elif markup_format.lower() == "pd_excel":
# Pandas style Excel spreadsheet generation
df = pd.DataFrame.from_dict({k: v for lod in dictOfLods.values() for k, v in lod.items()})
excel_io = io.BytesIO()
with pd.ExcelWriter(excel_io, engine="xlsxwriter") as writer:
df.to_excel(writer, sheet_name=name)
excel_io.seek(0)
return FileResponse(excel_io, media_type="application/vnd.ms-excel", filename=f"{name}.xlsx")

elif markup_format.lower() == "json":
# Direct JSON response
return JSONResponse(content=dictOfLods)

else:
return jsonify(dictOfLods)
# Using tabulate for other formats (including HTML)
tabulated_content = tabulate([lod for lod in dictOfLods.values()], headers="keys", tablefmt=markup_format)
media_type = "text/plain" if markup_format.lower() != "html" else "text/html"
return Response(content=tabulated_content, media_type=media_type)

@dataclass
class MetadataMappings:
Expand Down
162 changes: 0 additions & 162 deletions corpus/web/jpwebbrowser.py

This file was deleted.

13 changes: 2 additions & 11 deletions corpus/web/scholar.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import json
from typing import List
from flask import Blueprint, request, jsonify
from lodstorage.sparql import SPARQL

class ScholarBlueprint():
class ScholarAPI():
"""
API service for scholarly data
"""

def __init__(self, app, name: str, template_folder:str=None, appWrap=None):
def __init__(self, name: str):
'''
construct me
Expand All @@ -17,19 +16,11 @@ def __init__(self, app, name: str, template_folder:str=None, appWrap=None):
template_folder(str): the template folder
'''
self.name = name
if template_folder is not None:
self.template_folder = template_folder
else:
self.template_folder = 'scholar'
self.blueprint = Blueprint(name, __name__, template_folder=self.template_folder,url_prefix="/scholar")
self.app = app
self.appWrap = appWrap

@self.blueprint.route('/complete', methods=['POST'])
def completeScholar():
return self.completeScholar()

app.register_blueprint(self.blueprint)

def completeScholar(self):
"""
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ dependencies = [
"scikit-learn>=1.1.2",
"requests>=2.28.1",
"pyLookupParser>=0.0.2",
# https://github.com/WolfgangFahl/PyGenericSpreadSheet/
"pyGenericSpreadSheet>=0.2.4",
#https://pypi.org/project/ngwidgets/
"ngwidgets>=0.4.1"

Expand Down
5 changes: 0 additions & 5 deletions templates/cc/datasources.html

This file was deleted.

4 changes: 0 additions & 4 deletions templates/cc/generic500.html

This file was deleted.

5 changes: 0 additions & 5 deletions templates/cc/home.html

This file was deleted.

8 changes: 0 additions & 8 deletions templates/cc/queries.html

This file was deleted.

5 changes: 0 additions & 5 deletions templates/cc/result.html

This file was deleted.

Loading

0 comments on commit 43b2041

Please sign in to comment.