In [1]:
import pandas as pd
import importlib
from lib import kpi

In [42]:
importlib.reload(kpi)

<module 'lib.kpi' from 'c:\\workspace\\GitHub\\arcsaef\\lib\\kpi.py'>

In [8]:
data           = kpi.load_data()
rpt_config     = kpi.get_rpt_args()
saef_library   = kpi.get_saef_library()
responses_json = kpi.split_response(data[0])
ppl_collection = kpi.person_construct(responses_json, data[2], rpt_config[0])
proj_saef      = kpi.project_construct(responses_json, ppl_collection[0])
buckets        = kpi.matched_library(saef_library, ppl_collection[1])
templates      = kpi.load_templates()
meta_bucket    = buckets[0]
bucket         = buckets[1]
ppl_saef       = dict(sorted(ppl_collection[0].items(), key = lambda x: x[1].get('LastName')))
ppl_hash       = ppl_collection[1]
bulk_responses = data[0]
biblio         = data[1]
scopus         = data[2]
rpt_year       = rpt_config[0]
organisations  = rpt_config[1]
org_shortnames = list(organisations.keys())
proj_saef_nohold = proj_saef[proj_saef.Status != 'On hold']

In [9]:
# 1.remove empty rows
bucket = bucket[bucket['id_person'].isna() == False]
# 2. Make parsing more straight forward
for prsn in ppl_saef:
    if ppl_saef[prsn]['Gender'] == 'Non-binary/Gender diverse' or  ppl_saef[prsn]['Gender']  == 'Prefer not to say':
        ppl_saef[prsn]['Gender'] = 'Other'
# 3. Exclude: Nicole Webster, 027E2DEA-DB06-3946-B8C8-E053EF8E09F0
del ppl_saef['027E2DEA-DB06-3946-B8C8-E053EF8E09F0']; del ppl_hash['NicoleWebster']
# 4. Remove leading/trialing whitespace from  blibliographic entry.
biblio['Biblio'] = biblio['Biblio'].str.strip()
# 5. Jump into your pedantry
for prsn in ppl_saef:
    if ppl_saef[prsn]['Position'] != 'PhD Student' and  ppl_saef[prsn]['Position'] != 'Masters Student' and \
       ppl_saef[prsn]['Position'] != 'Honours Student':
        ppl_saef[prsn]['StudentProjectTitle'] = 'Not applicable'

 ### A brief what's where and why
 - Reporting data is from 3 sources;
   - Filemaker: api output (via Postman) is saved as text
     - data/kpi_reporting_yyyymmdd.txt
   - Zotero: api output for reporting year (via Postman) is saved as json
     - data/saef_library_postman_yyyymmdd.json
   - Bibiliography: Zotero bibligraphy output & manual update is saved a xlsx
     - data/all/zot_biblio.xlsx
 - Updating config/reporting.yaml is critical
 - Individual and Organisation cells are run for mid-year and annual reporting
 - Organisation Excel is uused for mid-year

In [10]:
def condition(x):
    if ppl_saef[x]['Position'] == 'Chief Investigator':
        return 'Chief Investigator'
    elif ppl_saef[x]['Position'] == 'Partner Investigator':
        return 'Partner Investigator'
    elif ppl_saef[x]['Position'] == 'Associate Investigator':
        return 'Associate Investigator'
bucket.loc[:, ('position')] =  bucket['id_person'].apply(condition)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bucket.loc[:, ('position')] =  bucket['id_person'].apply(condition)


In [33]:
prsn_list = []
for prsn in ppl_saef:
    if ppl_saef[prsn]['Position'] in ['Chief Investigator', 'Partner Investigator', 'Associate Investigator']:
        prsn_list.append(prsn)

# journals authored by CI's or PI's
prsn_jnls_df = bucket.loc[bucket.itemType.isin(['journalArticle']) & (bucket.id_person.isin(prsn_list)), \
                          ['title', 'name', 'id_person', 'position']]
# all person id's of CI'S & PI's without an authored publication
ci_pi_wo_any_jnls_ids = pd.Series(prsn_list)[~pd.Series(prsn_list).isin(prsn_jnls_df.id_person)]

prsn_no_jnls = []
for prsn in ppl_saef:
    if ppl_saef[prsn]['State'] == 'Active':
        if prsn in ci_pi_wo_any_jnls_ids.to_list():
            prsn_no_jnls.append({'title': 'zzDummy', 'name': ppl_saef[prsn]['FirstName']+ppl_saef[prsn]['LastName'],  'id_person': prsn, 'position': ppl_saef[prsn]['Position'] } )
prsn_no_jnls_df = pd.DataFrame(prsn_no_jnls)
prsn_no_jnls_df

Unnamed: 0,title,name,id_person,position
0,zzDummy,MehrdadAmirghasemi,1CDC830C-6F88-B04C-8553-847C1E3145B5,Associate Investigator
1,zzDummy,MargaretBarbour,17D9BE3B-B017-4C45-95F8-AA6B7BD31030,Partner Investigator
2,zzDummy,MichaelBode,28E905A2-8596-F640-84FF-21A92FA343C7,Chief Investigator
3,zzDummy,DanCharman,11579856-AA21-5643-94E5-77899530AB2D,Partner Investigator
4,zzDummy,GraemeClark,F6A987CD-7FF1-5049-8A5B-CD60D0B2674E,Chief Investigator
5,zzDummy,EmmaCooper,633165FC-9221-4170-965F-578CF9CA2CBC,Associate Investigator
6,zzDummy,DonaldCowan,C9266F7A-C1AA-3645-93E9-7F5D1EECBAC3,Partner Investigator
7,zzDummy,NoelCressie,4EA4A851-A6AD-464B-9899-7F0E8CBEDF98,Chief Investigator
8,zzDummy,GrantDuffy,A395D112-1183-EC45-903D-5EA0BEDA4F7A,Associate Investigator
9,zzDummy,MatthewDunbabin,CE81215A-D81F-EA49-8DE8-F4129E342F48,Chief Investigator


In [15]:
pd.crosstab(prsn_jnls_df.title, [prsn_jnls_df.position, prsn_jnls_df.name]).to_excel("output/2024/org/pub_auth_crosstab.xlsx", sheet_name=f"{'2024'}")

In [34]:
pd.crosstab(prsn_no_jnls_df.title, [prsn_no_jnls_df.position, prsn_no_jnls_df.name])

position,Associate Investigator,Associate Investigator,Associate Investigator,Associate Investigator,Associate Investigator,Associate Investigator,Associate Investigator,Associate Investigator,Associate Investigator,Associate Investigator,...,Partner Investigator,Partner Investigator,Partner Investigator,Partner Investigator,Partner Investigator,Partner Investigator,Partner Investigator,Partner Investigator,Partner Investigator,Partner Investigator
name,EmmaCooper,FlaviaFerrari,FrancescoRicci,GavinWinter,GrantDuffy,JacintaHolloway-Brown,JohannaTurnbull,KateThompson,KatieJones,MehrdadAmirghasemi,...,KrystynaSaunders,MardiMcNeil,MargaretBarbour,MatthewWheeler,MiguelOlalla-Tárraga,NeridaWilson,QuanHua,Reka-HajnalkaFulop,RossWhitmore,StephanieMcLennan
title,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
zzDummy,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1


In [16]:
bucket

Unnamed: 0,key,itemType,title,rights,pubyr,name,id_person,tags,publicationTitle,project,positon
0,E7534TV4,journalArticle,The atmosphere: a transport medium or an active microbial ecosystem?,oa,2024,RachaelLappan,6BC16DD4-3E26-AB41-8A1A-09F47E0C24F3,[],The ISME Journal,,Partner Investigator
2,JFHT2C9T,journalArticle,Including a diverse set of voices to address biological invasions,,2024,MelodieMcGeoch,9346D92C-9533-A94C-98DB-4C7F05B79FCD,[],Trends in Ecology & Evolution,,Chief Investigator
3,6RBB7SDM,dataset,Parsayarya/Scraping-ATCM: ATCM WP and IP paper dataset,Creative Commons Attribution 4.0 International,2024,MichaelBode,28E905A2-8596-F640-84FF-21A92FA343C7,[],,,Chief Investigator
4,LLDRRG65,dataset,Parsayarya/ATS-Measure-Scraping: ATS Measure Dataset,Creative Commons Attribution 4.0 International,2024,MichaelBode,28E905A2-8596-F640-84FF-21A92FA343C7,[],,,Chief Investigator
5,4XYRH4X6,journalArticle,Is tree planting an effective strategy for climate change mitigation?,12mo,2024,SharonRobinson,B3BB1CEF-1CDF-BD42-BC0A-21A212CCAB1A,[],Science of The Total Environment,,Chief Investigator
6,GB66JQII,journalArticle,Macroevolution of the plant-hummingbird pollination system,oa,2024,MatthiasDehling,0641E93F-1887-994C-86FE-510E18546AFC,[],Biological Reviews of the Cambridge Philosophical Society,,Partner Investigator
8,CEC442BP,journalArticle,On the relationship between mesoscale cellular convection and meteorological forcing: comparing the Southern Ocean against the North Pacific,oa,2024,StevenSiems,C1B89E8D-7298-5B41-8567-946D0A56323B,[],Atmospheric Chemistry and Physics,,Chief Investigator
9,CEC442BP,journalArticle,On the relationship between mesoscale cellular convection and meteorological forcing: comparing the Southern Ocean against the North Pacific,oa,2024,Tahereh(Nasim)Alinejadtabrizi,2947F044-F5D0-EE47-B913-296B314FEEC8,[],Atmospheric Chemistry and Physics,,Partner Investigator
10,6STGMD7P,journalArticle,Wet deposition in shallow convection over the Southern Ocean,oa,2024,StevenSiems,C1B89E8D-7298-5B41-8567-946D0A56323B,[],npj Climate and Atmospheric Science,,Chief Investigator
11,6STGMD7P,journalArticle,Wet deposition in shallow convection over the Southern Ocean,oa,2024,Tahereh(Nasim)Alinejadtabrizi,2947F044-F5D0-EE47-B913-296B314FEEC8,[],npj Climate and Atmospheric Science,,Partner Investigator
