Skip to content

Commit

Permalink
✨ csv columns
Browse files Browse the repository at this point in the history
  • Loading branch information
Simon3640 committed Apr 17, 2024
1 parent 2a94140 commit 8ba1adc
Show file tree
Hide file tree
Showing 11 changed files with 155 additions and 113 deletions.
57 changes: 31 additions & 26 deletions app/api/routes/v1/affiliation_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from flask import Blueprint, request, Response, Request

from services.v1.affiliation_app import affiliation_app_service
from infraestructure.mongo.repositories.work import WorkRepository
from services.work import work_service
from utils.encoder import JsonEncoder
from utils.flatten_json import flatten_json_list

Expand Down Expand Up @@ -93,40 +93,45 @@ def get_affiliation_csv(
section: str | None = "info",
tab: str | None = None,
):
result = WorkRepository.get_research_products_by_affiliation(id, typ)
result = work_service.get_research_products_info_by(
affiliation_id=id, affiliation_type=typ
)
if result:
config = {
"authors": {"name": "author_names", "fields": ["full_name"]},
"citations_count": {"name": "cited_by_count", "fields": ["count"]},
# "subjects": {"name": "subjects", "fields": ["name"]},
# "source": {"name": "source", "fields": ["name"]},
"title": {
"name": "titulo",
},
"authors": {
"name": "autores",
"fields": ["full_name"],
"config": {"full_name": {"name": "full_name"}},
},
"lenguage": {"name": "lengua"},
"citations_count": {
"name": "veces citado",
"fields": ["count"],
"config": {"count": {"name": "count"}},
},
"date_published": {
"name": "date",
"name": "fecha publicación",
"expresion": "datetime.date.fromtimestamp(value).strftime('%Y-%m-%d')",
},
"bibliographic_info": {
"name": "biblio",
"fields": [
"volume",
"issue",
"start_page",
"end_page",
# "is_open_access",
# "open_access_status",
],
},
"types": {"name": "type", "fields": ["type"]},
"remove": ["abstract", "source", "references_count", "subtitle"],
"titles": {
"name": "title",
"fields": ["title"],
"expresion": "next(filter(lambda x: x['lang'] == 'es', list_data), list_data[0])['title']",
"volume": {"name": "volumen"},
"issue": {"name": "issue"},
"start_page": {"name": "página inicial"},
"end_page": {"name": "página final"},
"year_published": {"name": "año de publicación"},
"types": {"name": "tipo de producto", "fields": ["type"]},
"subjects": {
"name": "temas",
"fields": ["name"],
"config": {"name": {"name": "name"}},
},
}
flat_data_list = flatten_json_list(result, config, 1)
all_keys = set()
all_keys = []
for item in flat_data_list:
all_keys.update(item.keys())
all_keys += [key for key in item.keys() if key not in all_keys]

output = io.StringIO()
csv_writer = csv.DictWriter(output, fieldnames=all_keys)
Expand Down
71 changes: 30 additions & 41 deletions app/api/routes/v1/person_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from flask import Blueprint, request, Response, Request

from services.v1.person_app import person_app_service
from services.work import work_service
from utils.encoder import JsonEncoder
from utils.flatten_json import flatten_json_list

Expand Down Expand Up @@ -75,55 +76,43 @@ def get_person(
def get_person_csv(
id: str | None = None, section: str | None = "info", tab: str | None = None
):
if section == "research" and tab == "products":
typ = request.args.get("type")
start_year = request.args.get("start_year")
endt_year = request.args.get("end_year")
page = request.args.get("page")
max_results = request.args.get("max")
sort = request.args.get("sort")
result = person_app_service.get_research_products_csv(
idx=id,
typ=typ,
start_year=start_year,
end_year=endt_year,
page=page,
max_results=max_results,
sort=sort,
)
result = work_service.get_research_products_by_author(author_id=id)
if result:
config = {
"authors": {"name": "author_names", "fields": ["full_name"]},
"citations_count": {"name": "cited_by_count", "fields": ["count"]},
# "subjects": {"name": "subjects", "fields": ["name"]},
# "source": {"name": "source", "fields": ["name"]},
"title": {
"name": "titulo",
},
"authors": {
"name": "autores",
"fields": ["full_name"],
"config": {"full_name": {"name": "full_name"}},
},
"lenguage": {"name": "lengua"},
"citations_count": {
"name": "veces citado",
"fields": ["count"],
"config": {"count": {"name": "count"}},
},
"date_published": {
"name": "date",
"name": "fecha publicación",
"expresion": "datetime.date.fromtimestamp(value).strftime('%Y-%m-%d')",
},
"bibliographic_info": {
"name": "biblio",
"fields": [
"volume",
"issue",
"start_page",
"end_page",
# "is_open_access",
# "open_access_status",
],
},
"types": {"name": "type", "fields": ["type"]},
"remove": ["abstract", "source", "references_count", "subtitle"],
"titles": {
"name": "title",
"fields": ["title"],
"expresion": "next(filter(lambda x: x['lang'] == 'es', list_data), list_data[0])['title']",
"volume": {"name": "volumen"},
"issue": {"name": "issue"},
"start_page": {"name": "página inicial"},
"end_page": {"name": "página final"},
"year_published": {"name": "año de publicación"},
"types": {"name": "tipo de producto", "fields": ["type"]},
"subjects": {
"name": "temas",
"fields": ["name"],
"config": {"name": {"name": "name"}},
},
}
flat_data_list = flatten_json_list(result["data"], config, 1)
all_keys = set()
flat_data_list = flatten_json_list(result, config, 1)
all_keys = []
for item in flat_data_list:
all_keys.update(item.keys())
all_keys += [key for key in item.keys() if key not in all_keys]

output = io.StringIO()
csv_writer = csv.DictWriter(output, fieldnames=all_keys)
Expand Down
2 changes: 1 addition & 1 deletion app/infraestructure/mongo/models/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class Updated(EmbeddedModel):


class ExternalId(EmbeddedModel):
id: ObjectId | None | str | int = None
id: ObjectId | None | str | int | list[str] = None
source: str | None = None
provenance: str | None = None

Expand Down
2 changes: 1 addition & 1 deletion app/infraestructure/mongo/models/person.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class Person(Model):
marital_status: str | None
ranking: list[Ranking]
birthplace: BirthPlace
birthdate: int
birthdate: int | str
degrees: list[Degree]
subjects: list[Any]

Expand Down
2 changes: 1 addition & 1 deletion app/infraestructure/mongo/models/work.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class Author(EmbeddedModel):


class Source(BaseModel):
id: ObjectId | str | None
id: ObjectId | str | None = None
name: str | Any | None = None


Expand Down
61 changes: 39 additions & 22 deletions app/infraestructure/mongo/repositories/work.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from infraestructure.mongo.models.work import Work
from infraestructure.mongo.models.person import Person
from infraestructure.mongo.utils.session import engine
from schemas.work import WorkCsv


class WorkRepository(RepositoryBase):
Expand Down Expand Up @@ -58,20 +59,22 @@ def wrap_pipeline(
{"$unwind": "$works"},
{"$match": match_works},
{"$group": {"_id": "$works._id", "works": {"$first": "$works"}}},
{
"$project": {
"works._id": 1,
"works.citations_count": 1,
"works.year_published": 1,
"works.titles": 1,
"works.source": 1,
"works.authors": 1,
"works.subjects": 1,
"works.bibliographic_info": 1,
"works.date_published": 1,
"works.types": 1,
}
},
# {
# "$project": {
# "works._id": 1,
# "works.citations_count": 1,
# "works.year_published": 1,
# "works.titles": 1,
# "works.source": 1,
# "works.authors": 1,
# "works.subjects": 1,
# "works.bibliographic_info": 1,
# "works.date_published": 1,
# "works.types": 1,
# "works.external_ids": 1,
# "works.external_urls": 1,
# }
# },
# {"$sort": {cls.sort_traduction[sort_field]: -1}},
]
return pipeline
Expand Down Expand Up @@ -181,22 +184,36 @@ def get_research_products_by_affiliation(
*,
start_year: int | None = None,
end_year: int | None = None,
skip: int = 0,
limit: int = 100,
sort: str = "citations",
) -> list[dict[str, Any]]:
affiliation_type = (
"institution" if affiliation_type == "Education" else affiliation_type
)
works_pipeline = cls.wrap_pipeline(affiliation_id, affiliation_type)
collection = Person if affiliation_type != "institution" else Work
works_pipeline += [
{"$replaceRoot": {"newRoot": "$works"}},
{"$skip": skip},
{"$limit": limit},
{"$replaceRoot": {"newRoot": "$works"}}
] if collection != Work else []
results = engine.get_collection(collection).aggregate(works_pipeline)
return [
WorkCsv.model_validate_json(
Work(**result).model_dump_json()
).model_dump(exclude={"titles"})
for result in results
]

@classmethod
def get_research_products_by_author(self, *, author_id: str) -> list[dict[str, Any]]:
works_pipeline = [
{"$match": {"authors.id": ObjectId(author_id)}},
]
results = engine.get_collection(Work).aggregate(works_pipeline)
return [
WorkCsv.model_validate_json(
Work(**result).model_dump_json()
).model_dump(exclude={"titles"})
for result in results
]
collection = Person if affiliation_type != "institution" else Work
results = list(engine.get_collection(collection).aggregate(works_pipeline))
return results

def get_research_products(
self,
Expand Down
2 changes: 1 addition & 1 deletion app/schemas/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class Updated(BaseModel):


class ExternalId(BaseModel):
id: Any
id: Any | None = None
source: str | None
provenance: str | None = None

Expand Down
2 changes: 1 addition & 1 deletion app/schemas/person.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class Person(PersonBase):
marital_status: str | None
ranking: list[Ranking | list[Ranking]]
birthplace: BirthPlace
birthdate: int
birthdate: int | str
degrees: list[Degree]
subjects: list[Any]

Expand Down
28 changes: 23 additions & 5 deletions app/schemas/work.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@


class Title(BaseModel):
title: str
lang: str
source: str
title: str | None = None
lang: str | None = None
source: str | None = None


class BiblioGraphicInfo(BaseModel):
Expand Down Expand Up @@ -45,11 +45,13 @@ class Author(BaseModel):
full_name: str
affiliations: list[Affiliation] | None = Field(default_affiliation=list)
external_ids: list[ExternalId] | None = Field(default_factory=list)
sex: str | None = None



class Source(BaseModel):
id: str
name: str | Any
id: str | None = None
name: str | Any | None = None
serials: Any | None = None


Expand Down Expand Up @@ -173,6 +175,22 @@ def append_urls_external_ids(cls, v: list[ExternalId]):
def get_citations_count(cls, v: list[CitationsCount]):
return v[0].count if v else 0

class WorkCsv(WorkProccessed):
date_published: int | float | str | None = None
start_page: str | None = None
end_page: str | None = None


@model_validator(mode="after")
def get_biblio_graphic_info(self):
self.open_access_status = self.bibliographic_info.open_access_status
self.volume = self.bibliographic_info.volume
self.issue = self.bibliographic_info.issue
self.start_page = self.bibliographic_info.start_page
self.end_page = self.bibliographic_info.end_page
self.bibliographic_info = None
return self


class Work(BaseModel):
updated: list[Updated] | None = Field(default_factory=list)
Expand Down
11 changes: 11 additions & 0 deletions app/services/work.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,16 @@ def get_info(self, *, id: str) -> dict[str, Any]:
self.update_source(work)
return {"data": work.model_dump(exclude_none=True, exclude={"titles"})}

def get_research_products_info_by(
self, *, affiliation_id: str, affiliation_type: str
) -> list[dict[str, Any]]:
return WorkRepository.get_research_products_by_affiliation(
affiliation_id, affiliation_type
)

def get_research_products_by_author(self, *, author_id: str) -> list[dict[str, Any]]:
return WorkRepository.get_research_products_by_author(author_id=author_id)



work_service = WorkService(work_repository, WorkSearch, WorkProccessed)
Loading

0 comments on commit 8ba1adc

Please sign in to comment.