✨ csv columns

colav-playground · Apr 17, 2024 · 8ba1adc · 8ba1adc
1 parent 2a94140
commit 8ba1adc
Show file tree

Hide file tree

Showing 11 changed files with 155 additions and 113 deletions.
diff --git a/app/api/routes/v1/affiliation_app.py b/app/api/routes/v1/affiliation_app.py
@@ -6,7 +6,7 @@
 from flask import Blueprint, request, Response, Request
 
 from services.v1.affiliation_app import affiliation_app_service
-from infraestructure.mongo.repositories.work import WorkRepository
+from services.work import work_service
 from utils.encoder import JsonEncoder
 from utils.flatten_json import flatten_json_list
 
@@ -93,40 +93,45 @@ def get_affiliation_csv(
     section: str | None = "info",
     tab: str | None = None,
 ):
-    result = WorkRepository.get_research_products_by_affiliation(id, typ)
+    result = work_service.get_research_products_info_by(
+        affiliation_id=id, affiliation_type=typ
+    )
     if result:
         config = {
-            "authors": {"name": "author_names", "fields": ["full_name"]},
-            "citations_count": {"name": "cited_by_count", "fields": ["count"]},
-            # "subjects": {"name": "subjects", "fields": ["name"]},
-            # "source": {"name": "source", "fields": ["name"]},
+            "title": {
+                "name": "titulo",
+            },
+            "authors": {
+                "name": "autores",
+                "fields": ["full_name"],
+                "config": {"full_name": {"name": "full_name"}},
+            },
+            "lenguage": {"name": "lengua"},
+            "citations_count": {
+                "name": "veces citado",
+                "fields": ["count"],
+                "config": {"count": {"name": "count"}},
+            },
             "date_published": {
-                "name": "date",
+                "name": "fecha publicación",
                 "expresion": "datetime.date.fromtimestamp(value).strftime('%Y-%m-%d')",
             },
-            "bibliographic_info": {
-                "name": "biblio",
-                "fields": [
-                    "volume",
-                    "issue",
-                    "start_page",
-                    "end_page",
-                    # "is_open_access",
-                    # "open_access_status",
-                ],
-            },
-            "types": {"name": "type", "fields": ["type"]},
-            "remove": ["abstract", "source", "references_count", "subtitle"],
-            "titles": {
-                "name": "title",
-                "fields": ["title"],
-                "expresion": "next(filter(lambda x: x['lang'] == 'es', list_data), list_data[0])['title']",
+            "volume": {"name": "volumen"},
+            "issue": {"name": "issue"},
+            "start_page": {"name": "página inicial"},
+            "end_page": {"name": "página final"},
+            "year_published": {"name": "año de publicación"},
+            "types": {"name": "tipo de producto", "fields": ["type"]},
+            "subjects": {
+                "name": "temas",
+                "fields": ["name"],
+                "config": {"name": {"name": "name"}},
             },
         }
         flat_data_list = flatten_json_list(result, config, 1)
-        all_keys = set()
+        all_keys = []
         for item in flat_data_list:
-            all_keys.update(item.keys())
+            all_keys += [key for key in item.keys() if key not in all_keys]
 
         output = io.StringIO()
         csv_writer = csv.DictWriter(output, fieldnames=all_keys)

diff --git a/app/api/routes/v1/person_app.py b/app/api/routes/v1/person_app.py
@@ -5,6 +5,7 @@
 from flask import Blueprint, request, Response, Request
 
 from services.v1.person_app import person_app_service
+from services.work import work_service
 from utils.encoder import JsonEncoder
 from utils.flatten_json import flatten_json_list
 
@@ -75,55 +76,43 @@ def get_person(
 def get_person_csv(
     id: str | None = None, section: str | None = "info", tab: str | None = None
 ):
-    if section == "research" and tab == "products":
-        typ = request.args.get("type")
-        start_year = request.args.get("start_year")
-        endt_year = request.args.get("end_year")
-        page = request.args.get("page")
-        max_results = request.args.get("max")
-        sort = request.args.get("sort")
-        result = person_app_service.get_research_products_csv(
-            idx=id,
-            typ=typ,
-            start_year=start_year,
-            end_year=endt_year,
-            page=page,
-            max_results=max_results,
-            sort=sort,
-        )
+    result = work_service.get_research_products_by_author(author_id=id)
     if result:
         config = {
-            "authors": {"name": "author_names", "fields": ["full_name"]},
-            "citations_count": {"name": "cited_by_count", "fields": ["count"]},
-            # "subjects": {"name": "subjects", "fields": ["name"]},
-            # "source": {"name": "source", "fields": ["name"]},
+            "title": {
+                "name": "titulo",
+            },
+            "authors": {
+                "name": "autores",
+                "fields": ["full_name"],
+                "config": {"full_name": {"name": "full_name"}},
+            },
+            "lenguage": {"name": "lengua"},
+            "citations_count": {
+                "name": "veces citado",
+                "fields": ["count"],
+                "config": {"count": {"name": "count"}},
+            },
             "date_published": {
-                "name": "date",
+                "name": "fecha publicación",
                 "expresion": "datetime.date.fromtimestamp(value).strftime('%Y-%m-%d')",
             },
-            "bibliographic_info": {
-                "name": "biblio",
-                "fields": [
-                    "volume",
-                    "issue",
-                    "start_page",
-                    "end_page",
-                    # "is_open_access",
-                    # "open_access_status",
-                ],
-            },
-            "types": {"name": "type", "fields": ["type"]},
-            "remove": ["abstract", "source", "references_count", "subtitle"],
-            "titles": {
-                "name": "title",
-                "fields": ["title"],
-                "expresion": "next(filter(lambda x: x['lang'] == 'es', list_data), list_data[0])['title']",
+            "volume": {"name": "volumen"},
+            "issue": {"name": "issue"},
+            "start_page": {"name": "página inicial"},
+            "end_page": {"name": "página final"},
+            "year_published": {"name": "año de publicación"},
+            "types": {"name": "tipo de producto", "fields": ["type"]},
+            "subjects": {
+                "name": "temas",
+                "fields": ["name"],
+                "config": {"name": {"name": "name"}},
             },
         }
-        flat_data_list = flatten_json_list(result["data"], config, 1)
-        all_keys = set()
+        flat_data_list = flatten_json_list(result, config, 1)
+        all_keys = []
         for item in flat_data_list:
-            all_keys.update(item.keys())
+            all_keys += [key for key in item.keys() if key not in all_keys]
 
         output = io.StringIO()
         csv_writer = csv.DictWriter(output, fieldnames=all_keys)

diff --git a/app/infraestructure/mongo/models/general.py b/app/infraestructure/mongo/models/general.py
@@ -13,7 +13,7 @@ class Updated(EmbeddedModel):
 
 
 class ExternalId(EmbeddedModel):
-    id: ObjectId | None | str | int = None
+    id: ObjectId | None | str | int | list[str] = None
     source: str | None = None
     provenance: str | None = None
 

diff --git a/app/infraestructure/mongo/models/person.py b/app/infraestructure/mongo/models/person.py
@@ -53,7 +53,7 @@ class Person(Model):
     marital_status: str | None
     ranking: list[Ranking]
     birthplace: BirthPlace
-    birthdate: int
+    birthdate: int | str
     degrees: list[Degree]
     subjects: list[Any]
 

diff --git a/app/infraestructure/mongo/models/work.py b/app/infraestructure/mongo/models/work.py
@@ -47,7 +47,7 @@ class Author(EmbeddedModel):
 
 
 class Source(BaseModel):
-    id: ObjectId | str | None
+    id: ObjectId | str | None = None
     name: str | Any | None = None
 
 

diff --git a/app/infraestructure/mongo/repositories/work.py b/app/infraestructure/mongo/repositories/work.py
@@ -8,6 +8,7 @@
 from infraestructure.mongo.models.work import Work
 from infraestructure.mongo.models.person import Person
 from infraestructure.mongo.utils.session import engine
+from schemas.work import WorkCsv
 
 
 class WorkRepository(RepositoryBase):
@@ -58,20 +59,22 @@ def wrap_pipeline(
             {"$unwind": "$works"},
             {"$match": match_works},
             {"$group": {"_id": "$works._id", "works": {"$first": "$works"}}},
-            {
-                "$project": {
-                    "works._id": 1,
-                    "works.citations_count": 1,
-                    "works.year_published": 1,
-                    "works.titles": 1,
-                    "works.source": 1,
-                    "works.authors": 1,
-                    "works.subjects": 1,
-                    "works.bibliographic_info": 1,
-                    "works.date_published": 1,
-                    "works.types": 1,
-                }
-            },
+            # {
+            #     "$project": {
+            #         "works._id": 1,
+            #         "works.citations_count": 1,
+            #         "works.year_published": 1,
+            #         "works.titles": 1,
+            #         "works.source": 1,
+            #         "works.authors": 1,
+            #         "works.subjects": 1,
+            #         "works.bibliographic_info": 1,
+            #         "works.date_published": 1,
+            #         "works.types": 1,
+            #         "works.external_ids": 1,
+            #         "works.external_urls": 1,
+            #     }
+            # },
             # {"$sort": {cls.sort_traduction[sort_field]: -1}},
         ]
         return pipeline
@@ -181,22 +184,36 @@ def get_research_products_by_affiliation(
         *,
         start_year: int | None = None,
         end_year: int | None = None,
-        skip: int = 0,
-        limit: int = 100,
         sort: str = "citations",
     ) -> list[dict[str, Any]]:
         affiliation_type = (
             "institution" if affiliation_type == "Education" else affiliation_type
         )
         works_pipeline = cls.wrap_pipeline(affiliation_id, affiliation_type)
+        collection = Person if affiliation_type != "institution" else Work
         works_pipeline += [
-            {"$replaceRoot": {"newRoot": "$works"}},
-            {"$skip": skip},
-            {"$limit": limit},
+            {"$replaceRoot": {"newRoot": "$works"}}
+        ] if collection != Work else []
+        results = engine.get_collection(collection).aggregate(works_pipeline)
+        return [
+            WorkCsv.model_validate_json(
+                Work(**result).model_dump_json()
+            ).model_dump(exclude={"titles"})
+            for result in results
+        ]
+
+    @classmethod
+    def get_research_products_by_author(self, *, author_id: str) -> list[dict[str, Any]]:
+        works_pipeline = [
+            {"$match": {"authors.id": ObjectId(author_id)}},
+        ]
+        results = engine.get_collection(Work).aggregate(works_pipeline)
+        return [
+            WorkCsv.model_validate_json(
+                Work(**result).model_dump_json()
+            ).model_dump(exclude={"titles"})
+            for result in results
         ]
-        collection = Person if affiliation_type != "institution" else Work
-        results = list(engine.get_collection(collection).aggregate(works_pipeline))
-        return results
 
     def get_research_products(
         self,

diff --git a/app/schemas/general.py b/app/schemas/general.py
@@ -16,7 +16,7 @@ class Updated(BaseModel):
 
 
 class ExternalId(BaseModel):
-    id: Any
+    id: Any | None = None
     source: str | None
     provenance: str | None = None
 

diff --git a/app/schemas/person.py b/app/schemas/person.py
@@ -66,7 +66,7 @@ class Person(PersonBase):
     marital_status: str | None
     ranking: list[Ranking | list[Ranking]]
     birthplace: BirthPlace
-    birthdate: int
+    birthdate: int | str
     degrees: list[Degree]
     subjects: list[Any]
 

diff --git a/app/schemas/work.py b/app/schemas/work.py
@@ -7,9 +7,9 @@
 
 
 class Title(BaseModel):
-    title: str
-    lang: str
-    source: str
+    title: str | None = None
+    lang: str | None = None
+    source: str | None = None
 
 
 class BiblioGraphicInfo(BaseModel):
@@ -45,11 +45,13 @@ class Author(BaseModel):
     full_name: str
     affiliations: list[Affiliation] | None = Field(default_affiliation=list)
     external_ids: list[ExternalId] | None = Field(default_factory=list)
+    sex: str | None = None
+
 
 
 class Source(BaseModel):
-    id: str
-    name: str | Any
+    id: str | None = None
+    name: str | Any | None = None
     serials: Any | None = None
 
 
@@ -173,6 +175,22 @@ def append_urls_external_ids(cls, v: list[ExternalId]):
     def get_citations_count(cls, v: list[CitationsCount]):
         return v[0].count if v else 0
 
+class WorkCsv(WorkProccessed):
+    date_published: int | float | str | None = None
+    start_page: str | None = None
+    end_page: str | None = None
+
+
+    @model_validator(mode="after")
+    def get_biblio_graphic_info(self):
+        self.open_access_status = self.bibliographic_info.open_access_status
+        self.volume = self.bibliographic_info.volume
+        self.issue = self.bibliographic_info.issue
+        self.start_page = self.bibliographic_info.start_page
+        self.end_page = self.bibliographic_info.end_page
+        self.bibliographic_info = None
+        return self
+
 
 class Work(BaseModel):
     updated: list[Updated] | None = Field(default_factory=list)

diff --git a/app/services/work.py b/app/services/work.py
@@ -36,5 +36,16 @@ def get_info(self, *, id: str) -> dict[str, Any]:
         self.update_source(work)
         return {"data": work.model_dump(exclude_none=True, exclude={"titles"})}
 
+    def get_research_products_info_by(
+        self, *, affiliation_id: str, affiliation_type: str
+    ) -> list[dict[str, Any]]:
+        return WorkRepository.get_research_products_by_affiliation(
+            affiliation_id, affiliation_type
+        )
+
+    def get_research_products_by_author(self, *, author_id: str) -> list[dict[str, Any]]:
+        return WorkRepository.get_research_products_by_author(author_id=author_id)
+
+
 
 work_service = WorkService(work_repository, WorkSearch, WorkProccessed)