In [None]:
from pydantic import BaseModel, Field
from typing import List
import pandas as pd
from openai import OpenAI

import os
from tqdm import tqdm


In [None]:
client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])

In [76]:
df = pd.read_csv('2019_mundipharma.csv')
df['authors_index'] = df['authors_index'].str.replace(r'\d+', '', regex=True)
df_sample = df
df_sample

Unnamed: 0,pmid,title,authors,citation,first_author,journal,publication_year,create_date,pmcid,nihms_id,doi,url,href,coi,grants,authors_index
0,30522922,Brentuximab vedotin with chemotherapy for CD30...,"Steven Horwitz 1, Owen A OConnor 2, Barbara Pr...",Lancet. 2019 Jan 19;393(10168):229-240. doi: 1...,Horwitz S,Lancet,2019,2018/12/08,PMC6436818,NIHMS1515984,10.1016/S0140-6736(18)32984-2,https://pubmed.ncbi.nlm.nih.gov/30522922/,/30522922/,DECLARATION OF INTERESTS Dr. Horwitz reports r...,Grants and funding P30 CA008748/CA/NCI NIH HHS...,"Steven Horwitz , Owen A OConnor , Barbara Pr..."
1,30312503,Pediatric asthma: An unmet need for more effec...,"Nikolaos G Papadopoulos 1 2, Adnan Čustović 3,...",Pediatr Allergy Immunol. 2019 Feb;30(1):7-16. ...,Papadopoulos NG,Pediatr Allergy Immunol,2019,2018/10/13,PMC7380053,,10.1111/pai.12990,https://pubmed.ncbi.nlm.nih.gov/30312503/,/30312503/,Adnan Čustović reports personal fees from Nova...,"Grants and funding Novartis Pharma AG, Basel, ...","Nikolaos G Papadopoulos , Adnan Čustović , ..."
2,30242544,Optimizing cancer pain management in resource-...,"Sam H Ahmedzai 1, Mary Jocylyn Bautista 2, Kam...",Support Care Cancer. 2019 Jun;27(6):2113-2124....,Ahmedzai SH,Support Care Cancer,2019,2018/09/23,PMC6499735,,10.1007/s00520-018-4471-z,https://pubmed.ncbi.nlm.nih.gov/30242544/,/30242544/,"MJB, YG, AAIH, SH, DK, DCK, DHL, and JJW recei...",,"Sam H Ahmedzai , Mary Jocylyn Bautista , Kam..."
3,30457075,Augmentation therapies for treatment-resistant...,"Rebecca Strawbridge 1, Ben Carter 2, Lindsey M...",Br J Psychiatry. 2019 Jan;214(1):42-51. doi: 1...,Strawbridge R,Br J Psychiatry,2019,2018/11/21,,,10.1192/bjp.2018.233,https://pubmed.ncbi.nlm.nih.gov/30457075/,/30457075/,,,"Rebecca Strawbridge , Ben Carter , Lindsey M..."
4,29987879,Healthcare resource utilization and costs asso...,"Jaco Voorham 1, Xiao Xu 2, David B Price 1 3, ...",Allergy. 2019 Feb;74(2):273-283. doi: 10.1111/...,Voorham J,Allergy,2019,2018/07/11,PMC6587525,,10.1111/all.13556,https://pubmed.ncbi.nlm.nih.gov/29987879/,/29987879/,Jaco Voorham and Marjan Kerkhof are current em...,Grants and funding AstraZeneca/International,"Jaco Voorham , Xiao Xu , David B Price , S..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
453,30674053,Efficacy of Disease Management Programs Asthma...,"Frank Kanniess 1, Katja Krockenberger 2 3, Pet...",Dtsch Med Wochenschr. 2019 Jan;144(2):e1. doi:...,Kanniess F,Dtsch Med Wochenschr,2019,2019/01/24,,,10.1055/a-0837-2602,https://pubmed.ncbi.nlm.nih.gov/30674053/,/30674053/,Diese Studie wurde durch die Mundipharma GmbH ...,,"Frank Kanniess , Katja Krockenberger , Petr..."
454,31017657,Sustained long-term efficacy and safety of ada...,"D Thaçi 1, K Papp 2, D Marcoux 3, L Weibel 4, ...",Br J Dermatol. 2019 Dec;181(6):1177-1189. doi:...,Thaçi D,Br J Dermatol,2019,2019/04/25,PMC6916374,,10.1111/bjd.18029,https://pubmed.ncbi.nlm.nih.gov/31017657/,/31017657/,D.T. has served as a consultant member of advi...,,"D Thaçi , K Papp , D Marcoux , L Weibel , ..."
455,31339847,"Global, regional, and national burden of suici...",Mohsen Naghavi; Global Burden of Disease Self-...,BMJ. 2019 Feb 6;364:l94. doi: 10.1136/bmj.l94.,Naghavi M,BMJ,2019,2019/07/25,PMC6598639,,10.1136/bmj.l94,https://pubmed.ncbi.nlm.nih.gov/31339847/,/31339847/,Competing interests: All authors have complete...,,Mohsen Naghavi; Global Burden of Disease Self-...
456,30835879,Cross-national patterns of substance use disor...,"Meredith G Harris 1 2, Chrianna Bharat 3, Meye...",Addiction. 2019 Aug;114(8):1446-1459. doi: 10....,Harris MG,Addiction,2019,2019/03/06,PMC7408310,NIHMS1614375,10.1111/add.14599,https://pubmed.ncbi.nlm.nih.gov/30835879/,/30835879/,Disclosure/Conflicts of Interest: In the past ...,Grants and funding 1099709/National Health and...,"Meredith G Harris , Chrianna Bharat , Meyer..."


In [77]:
class COI(BaseModel):
    author: str = Field(description="The name or initials of the author reporting the conflict of interest.")
    organizations: List[str] = Field(description="List of organizations associated with the author.")
    sentence: str = Field(description="The entire sentence from the article mentioning the conflict of interest.")

class COIList(BaseModel):
    cois: List[COI] = Field(description="List of COI entries.")

def extract_coi_info(coi_text: str, author_names: str) -> List[dict]:
    try:
        messages = [
            {"role": "system", "content": f"Extract the conflict of interests for each author in json format. The author names may be abbreviated. The author list is {author_names} - use their full names in the json."},
            {"role": "user", "content": coi_text},
        ]
        
        completion = client.beta.chat.completions.parse(
            model="gpt-4o-2024-08-06",
            messages=messages,
            response_format=COIList,
            temperature=0.0,
        )
        
        event = completion.choices[0].message.parsed
        event_dict = event.dict()        
        return event_dict['cois']
    
    except Exception as e:
        print(f"Error processing COI text: {e}")
        return []



In [79]:
chunk_size = 50  
output_folder = "output_folder_mundipharma/"  
start_chunk = 0  
cache_file = "processed_chunks_cache_2.txt"  # File to keep track of processed chunks

os.makedirs(output_folder, exist_ok=True)

if os.path.exists(cache_file):
    with open(cache_file, "r") as f:
        processed_chunks = set(int(line.strip()) for line in f)
else:
    processed_chunks = set()

tqdm.pandas()

for i in range(start_chunk * chunk_size, len(df_sample), chunk_size):
    chunk_number = i // chunk_size + 1

    if chunk_number in processed_chunks:
        print(f"Skipping chunk {chunk_number} (already processed)")
        continue

    chunk = df_sample.iloc[i:i + chunk_size]   

    chunk['coi_info'] = chunk.progress_apply(
        lambda row: extract_coi_info(row['coi'], row['authors_index']),
        axis=1
    )

    chunk.to_csv(f"{output_folder}chunk_{chunk_number}.csv", index=False)
    print(f"Saved processed chunk {chunk_number}")

    with open(cache_file, "a") as f:
        f.write(f"{chunk_number}\n")


  0%|          | 0/50 [00:00<?, ?it/s]/var/folders/8k/kp_pwhv96pjcp_nh49ycpw0c0000gn/T/ipykernel_80669/1976895240.py:24: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  event_dict = event.dict()
 10%|█         | 5/50 [01:48<13:15, 17.67s/it]

Error processing COI text: Error code: 400 - {'error': {'message': "We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.)", 'type': 'invalid_request_error', 'param': None, 'code': None}}


 42%|████▏     | 21/50 [26:55<59:32, 123.20s/it]  

Error processing COI text: Error code: 400 - {'error': {'message': "We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.)", 'type': 'invalid_request_error', 'param': None, 'code': None}}
Error processing COI text: Error code: 400 - {'error': {'message': "We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.)", 'type': 'invalid_request_error', 'param': None, 'code': None}}


 50%|█████     | 25/50 [27:23<16:47, 40.28s/it] 

Error processing COI text: Error code: 400 - {'error': {'message': "We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.)", 'type': 'invalid_request_error', 'param': None, 'code': None}}


100%|██████████| 50/50 [33:54<00:00, 40.69s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk['coi_info'] = chunk.progress_apply(


Saved processed chunk 1


 28%|██▊       | 14/50 [02:21<03:46,  6.29s/it]

Error processing COI text: Error code: 400 - {'error': {'message': "We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.)", 'type': 'invalid_request_error', 'param': None, 'code': None}}


100%|██████████| 50/50 [08:28<00:00, 10.17s/it]


Saved processed chunk 2


 26%|██▌       | 13/50 [02:28<07:08, 11.58s/it]

Error processing COI text: Error code: 400 - {'error': {'message': "We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.)", 'type': 'invalid_request_error', 'param': None, 'code': None}}


 72%|███████▏  | 36/50 [06:47<02:54, 12.47s/it]

Error processing COI text: Error code: 400 - {'error': {'message': "We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.)", 'type': 'invalid_request_error', 'param': None, 'code': None}}


100%|██████████| 50/50 [11:16<00:00, 13.53s/it]


Saved processed chunk 3


 10%|█         | 5/50 [00:34<04:19,  5.77s/it]

Error processing COI text: Error code: 400 - {'error': {'message': "We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.)", 'type': 'invalid_request_error', 'param': None, 'code': None}}


 26%|██▌       | 13/50 [01:10<03:04,  4.99s/it]

Error processing COI text: Error code: 400 - {'error': {'message': "We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.)", 'type': 'invalid_request_error', 'param': None, 'code': None}}


 52%|█████▏    | 26/50 [03:10<03:39,  9.17s/it]

Error processing COI text: Error code: 400 - {'error': {'message': "We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.)", 'type': 'invalid_request_error', 'param': None, 'code': None}}


100%|██████████| 50/50 [09:39<00:00, 11.60s/it]


Saved processed chunk 4


 84%|████████▍ | 42/50 [11:46<03:13, 24.22s/it]

Error processing COI text: Error code: 400 - {'error': {'message': "We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.)", 'type': 'invalid_request_error', 'param': None, 'code': None}}


100%|██████████| 50/50 [13:21<00:00, 16.03s/it]


Saved processed chunk 5


  6%|▌         | 3/50 [00:34<08:44, 11.16s/it]

Error processing COI text: Error code: 400 - {'error': {'message': "We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.)", 'type': 'invalid_request_error', 'param': None, 'code': None}}


 70%|███████   | 35/50 [07:45<02:30, 10.05s/it]

Error processing COI text: Error code: 400 - {'error': {'message': "We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.)", 'type': 'invalid_request_error', 'param': None, 'code': None}}


 74%|███████▍  | 37/50 [07:54<01:38,  7.57s/it]

Error processing COI text: Error code: 400 - {'error': {'message': "We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.)", 'type': 'invalid_request_error', 'param': None, 'code': None}}


 80%|████████  | 40/50 [07:59<00:43,  4.39s/it]

Error processing COI text: Error code: 400 - {'error': {'message': "We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.)", 'type': 'invalid_request_error', 'param': None, 'code': None}}


100%|██████████| 50/50 [10:33<00:00, 12.67s/it]


Saved processed chunk 6


 80%|████████  | 40/50 [09:28<01:48, 10.85s/it]

Error processing COI text: Error code: 400 - {'error': {'message': "We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.)", 'type': 'invalid_request_error', 'param': None, 'code': None}}


 88%|████████▊ | 44/50 [10:44<01:53, 18.85s/it]

Error processing COI text: Error code: 400 - {'error': {'message': "We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.)", 'type': 'invalid_request_error', 'param': None, 'code': None}}


100%|██████████| 50/50 [11:58<00:00, 14.37s/it]


Saved processed chunk 7


 90%|█████████ | 45/50 [09:03<00:31,  6.33s/it]

Error processing COI text: Error code: 400 - {'error': {'message': "We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.)", 'type': 'invalid_request_error', 'param': None, 'code': None}}


100%|██████████| 50/50 [10:14<00:00, 12.28s/it]


Saved processed chunk 8


 54%|█████▍    | 27/50 [04:57<02:38,  6.91s/it]

Error processing COI text: Error code: 400 - {'error': {'message': "We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.)", 'type': 'invalid_request_error', 'param': None, 'code': None}}


100%|██████████| 50/50 [09:29<00:00, 11.39s/it]


Saved processed chunk 9


100%|██████████| 8/8 [01:19<00:00,  9.97s/it]

Saved processed chunk 10





In [80]:
processed_chunks = []
for file in os.listdir(output_folder):
    if file.endswith(".csv"):
        processed_chunks.append(pd.read_csv(os.path.join(output_folder, file)))

df_processed = pd.concat(processed_chunks, ignore_index=True)
df_processed

Unnamed: 0,pmid,title,authors,citation,first_author,journal,publication_year,create_date,pmcid,nihms_id,doi,url,href,coi,grants,authors_index,coi_info
0,31694338,Lenalidomide Maintenance with or without Predn...,"Francesca Bonello 1, Stefano Pulini 2, Stelvio...",Cancers (Basel). 2019 Nov 5;11(11):1735. doi: ...,Bonello F,Cancers (Basel),2019,2019/11/08,PMC6896192,,10.3390/cancers11111735,https://pubmed.ncbi.nlm.nih.gov/31694338/,/31694338/,"O.A. has received funding from Takeda, Janssen...",,"Francesca Bonello , Stefano Pulini , Stelvio...","[{'author': 'Ombretta Annibali', 'organization..."
1,30612585,Effectiveness of a hydrogel dressing as an ana...,"Maleea D Holbert 1 2, Bronwyn R Griffin 3 4 5,...",Trials. 2019 Jan 6;20(1):13. doi: 10.1186/s130...,Holbert MD,Trials,2019,2019/01/08,PMC6322255,,10.1186/s13063-018-3057-x,https://pubmed.ncbi.nlm.nih.gov/30612585/,/30612585/,This trial has being granted ethical approval ...,Grants and funding 2014001961/Mundipharma,"Maleea D Holbert , Bronwyn R Griffin , St...","[{'author': 'Maleea D Holbert', 'organizations..."
2,31122243,Spirometric assessment of emphysema presence a...,"Mariaelena Occhipinti 1, Matteo Paoletti 2, Br...",Respir Res. 2019 May 23;20(1):101. doi: 10.118...,Occhipinti M,Respir Res,2019,2019/05/25,PMC6533715,,10.1186/s12931-019-1049-3,https://pubmed.ncbi.nlm.nih.gov/31122243/,/31122243/,Dr. Occhipinti reports personal fees from Imbi...,Grants and funding ---/Ministero dellIstruzion...,"Mariaelena Occhipinti , Matteo Paoletti , Br...","[{'author': 'Mariaelena Occhipinti', 'organiza..."
3,31243042,Multicenter analysis of outcomes in blastic pl...,"Justin Taylor 1, Michael Haddadin 1, Vivek A U...",Blood. 2019 Aug 22;134(8):678-687. doi: 10.118...,Taylor J,Blood,2019,2019/06/28,PMC6706810,,10.1182/blood.2019001144,https://pubmed.ncbi.nlm.nih.gov/31243042/,/31243042/,Conflict-of-interest disclosure: N.M.-S. has r...,Grants and funding K08 CA230319/CA/NCI NIH HHS...,"Justin Taylor , Michael Haddadin , Vivek A U...","[{'author': 'Neha Mehta-Shah', 'organizations'..."
4,31142617,Ozone-primed neutrophils promote early steps o...,"Natacha Rocks 1, Céline Vanwinge 1, Coraline R...",Thorax. 2019 Aug;74(8):768-779. doi: 10.1136/t...,Rocks N,Thorax,2019,2019/05/31,,,10.1136/thoraxjnl-2018-211990,https://pubmed.ncbi.nlm.nih.gov/31142617/,/31142617/,Competing interests: DC is the founder of Aqui...,,"Natacha Rocks , Céline Vanwinge , Coraline R...","[{'author': 'Didier Cataldo', 'organizations':..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
453,31684698,Adductor canal blocks for postoperative pain t...,"Alexander Schnabel 1, Sylvia U Reichl 2, Steph...",Cochrane Database Syst Rev. 2019 Oct 26;2019(1...,Schnabel A,Cochrane Database Syst Rev,2019,2019/11/06,PMC6814953,,10.1002/14651858.CD012262.pub2,https://pubmed.ncbi.nlm.nih.gov/31684698/,/31684698/,Alexander Schnabel: none known. Sylvia U Reich...,,"Alexander Schnabel , Sylvia U Reichl , Steph...","[{'author': 'Alexander Schnabel', 'organizatio..."
454,30649604,Adaptation and qualitative evaluation of encou...,"Pola Hahlweg 1, Isabell Witzel 2, Volkmar Müll...",Arch Gynecol Obstet. 2019 Apr;299(4):1141-1149...,Hahlweg P,Arch Gynecol Obstet,2019,2019/01/17,PMC6435605,,10.1007/s00404-018-5035-7,https://pubmed.ncbi.nlm.nih.gov/30649604/,/30649604/,PH’s institution received the grant from resea...,Grants and funding NWF 15_10/research funding ...,"Pola Hahlweg , Isabell Witzel , Volkmar Müll...","[{'author': 'Pola Hahlweg', 'organizations': [..."
455,31796013,Does a tailored intervention to promote adhere...,"Claudia Gregoriano 1 2, Thomas Dieterle 3 4, A...",Respir Res. 2019 Dec 3;20(1):273. doi: 10.1186...,Gregoriano C,Respir Res,2019,2019/12/05,PMC6892023,,10.1186/s12931-019-1219-3,https://pubmed.ncbi.nlm.nih.gov/31796013/,/31796013/,The authors declare that they have no competin...,Grants and funding None/Freiwillige Akademisch...,"Claudia Gregoriano , Thomas Dieterle , Ann...","[{'author': 'Claudia Gregoriano', 'organizatio..."
456,30442715,The potential impact of azithromycin in idiopa...,"Claudio Macaluso 1 2, Joaquín Maritano Furcada...",Eur Respir J. 2019 Feb 14;53(2):1800628. doi: ...,Macaluso C,Eur Respir J,2019,2018/11/17,,,10.1183/13993003.00628-2018,https://pubmed.ncbi.nlm.nih.gov/30442715/,/30442715/,Conflict of interest: C. Macaluso has nothing ...,Grants and funding 20719/VAC_/Versus Arthritis...,"Claudio Macaluso , Joaquín Maritano Furcada ...","[{'author': 'Claudio Macaluso', 'organizations..."


In [81]:
df_processed['coi_info'] = df_processed['coi_info'].apply(eval)
df_exploded = df_processed.explode('coi_info')
df_exploded

Unnamed: 0,pmid,title,authors,citation,first_author,journal,publication_year,create_date,pmcid,nihms_id,doi,url,href,coi,grants,authors_index,coi_info
0,31694338,Lenalidomide Maintenance with or without Predn...,"Francesca Bonello 1, Stefano Pulini 2, Stelvio...",Cancers (Basel). 2019 Nov 5;11(11):1735. doi: ...,Bonello F,Cancers (Basel),2019,2019/11/08,PMC6896192,,10.3390/cancers11111735,https://pubmed.ncbi.nlm.nih.gov/31694338/,/31694338/,"O.A. has received funding from Takeda, Janssen...",,"Francesca Bonello , Stefano Pulini , Stelvio...","{'author': 'Ombretta Annibali', 'organizations..."
0,31694338,Lenalidomide Maintenance with or without Predn...,"Francesca Bonello 1, Stefano Pulini 2, Stelvio...",Cancers (Basel). 2019 Nov 5;11(11):1735. doi: ...,Bonello F,Cancers (Basel),2019,2019/11/08,PMC6896192,,10.3390/cancers11111735,https://pubmed.ncbi.nlm.nih.gov/31694338/,/31694338/,"O.A. has received funding from Takeda, Janssen...",,"Francesca Bonello , Stefano Pulini , Stelvio...","{'author': 'Paola Omedé', 'organizations': ['J..."
0,31694338,Lenalidomide Maintenance with or without Predn...,"Francesca Bonello 1, Stefano Pulini 2, Stelvio...",Cancers (Basel). 2019 Nov 5;11(11):1735. doi: ...,Bonello F,Cancers (Basel),2019,2019/11/08,PMC6896192,,10.3390/cancers11111735,https://pubmed.ncbi.nlm.nih.gov/31694338/,/31694338/,"O.A. has received funding from Takeda, Janssen...",,"Francesca Bonello , Stefano Pulini , Stelvio...","{'author': 'Luigi Podda', 'organizations': ['J..."
0,31694338,Lenalidomide Maintenance with or without Predn...,"Francesca Bonello 1, Stefano Pulini 2, Stelvio...",Cancers (Basel). 2019 Nov 5;11(11):1735. doi: ...,Bonello F,Cancers (Basel),2019,2019/11/08,PMC6896192,,10.3390/cancers11111735,https://pubmed.ncbi.nlm.nih.gov/31694338/,/31694338/,"O.A. has received funding from Takeda, Janssen...",,"Francesca Bonello , Stefano Pulini , Stelvio...","{'author': 'Angelo Belotti', 'organizations': ..."
0,31694338,Lenalidomide Maintenance with or without Predn...,"Francesca Bonello 1, Stefano Pulini 2, Stelvio...",Cancers (Basel). 2019 Nov 5;11(11):1735. doi: ...,Bonello F,Cancers (Basel),2019,2019/11/08,PMC6896192,,10.3390/cancers11111735,https://pubmed.ncbi.nlm.nih.gov/31694338/,/31694338/,"O.A. has received funding from Takeda, Janssen...",,"Francesca Bonello , Stefano Pulini , Stelvio...","{'author': 'Antonio Palumbo', 'organizations':..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
456,30442715,The potential impact of azithromycin in idiopa...,"Claudio Macaluso 1 2, Joaquín Maritano Furcada...",Eur Respir J. 2019 Feb 14;53(2):1800628. doi: ...,Macaluso C,Eur Respir J,2019,2018/11/17,,,10.1183/13993003.00628-2018,https://pubmed.ncbi.nlm.nih.gov/30442715/,/30442715/,Conflict of interest: C. Macaluso has nothing ...,Grants and funding 20719/VAC_/Versus Arthritis...,"Claudio Macaluso , Joaquín Maritano Furcada ...","{'author': 'Elizabeth A Renzoni', 'organizatio..."
456,30442715,The potential impact of azithromycin in idiopa...,"Claudio Macaluso 1 2, Joaquín Maritano Furcada...",Eur Respir J. 2019 Feb 14;53(2):1800628. doi: ...,Macaluso C,Eur Respir J,2019,2018/11/17,,,10.1183/13993003.00628-2018,https://pubmed.ncbi.nlm.nih.gov/30442715/,/30442715/,Conflict of interest: C. Macaluso has nothing ...,Grants and funding 20719/VAC_/Versus Arthritis...,"Claudio Macaluso , Joaquín Maritano Furcada ...","{'author': 'Philip L Molyneaux', 'organization..."
457,31310600,Intranasal sufentanil versus intravenous morph...,"Marc Blancher 1, Maxime Maignan 1 2, Cyrielle ...",PLoS Med. 2019 Jul 16;16(7):e1002849. doi: 10....,Blancher M,PLoS Med,2019,2019/07/17,PMC6634380,,10.1371/journal.pmed.1002849,https://pubmed.ncbi.nlm.nih.gov/31310600/,/31310600/,I have read the journals policy and the author...,Grants and funding The study was supported by ...,"Marc Blancher , Maxime Maignan , Cyrielle C...","{'author': 'Maxime Maignan', 'organizations': ..."
457,31310600,Intranasal sufentanil versus intravenous morph...,"Marc Blancher 1, Maxime Maignan 1 2, Cyrielle ...",PLoS Med. 2019 Jul 16;16(7):e1002849. doi: 10....,Blancher M,PLoS Med,2019,2019/07/17,PMC6634380,,10.1371/journal.pmed.1002849,https://pubmed.ncbi.nlm.nih.gov/31310600/,/31310600/,I have read the journals policy and the author...,Grants and funding The study was supported by ...,"Marc Blancher , Maxime Maignan , Cyrielle C...","{'author': 'Guillaume Debaty', 'organizations'..."


In [None]:
df_exploded = pd.concat([df_exploded.drop(['coi_info'], axis=1), df_exploded['coi_info'].apply(pd.Series)], axis=1)
df_exploded

Unnamed: 0,pmid,title,authors,citation,first_author,journal,publication_year,create_date,pmcid,nihms_id,doi,url,href,coi,grants,authors_index,0,author,organizations,sentence
0,31694338,Lenalidomide Maintenance with or without Predn...,"Francesca Bonello 1, Stefano Pulini 2, Stelvio...",Cancers (Basel). 2019 Nov 5;11(11):1735. doi: ...,Bonello F,Cancers (Basel),2019,2019/11/08,PMC6896192,,10.3390/cancers11111735,https://pubmed.ncbi.nlm.nih.gov/31694338/,/31694338/,"O.A. has received funding from Takeda, Janssen...",,"Francesca Bonello , Stefano Pulini , Stelvio...",,Ombretta Annibali,"[Takeda, Janssen-Cilag, Amgen, Gilead, Janssen...","O.A. has received funding from Takeda, Janssen..."
0,31694338,Lenalidomide Maintenance with or without Predn...,"Francesca Bonello 1, Stefano Pulini 2, Stelvio...",Cancers (Basel). 2019 Nov 5;11(11):1735. doi: ...,Bonello F,Cancers (Basel),2019,2019/11/08,PMC6896192,,10.3390/cancers11111735,https://pubmed.ncbi.nlm.nih.gov/31694338/,/31694338/,"O.A. has received funding from Takeda, Janssen...",,"Francesca Bonello , Stefano Pulini , Stelvio...",,Paola Omedé,[Janssen],P.O. has served on the advisory board for Jans...
0,31694338,Lenalidomide Maintenance with or without Predn...,"Francesca Bonello 1, Stefano Pulini 2, Stelvio...",Cancers (Basel). 2019 Nov 5;11(11):1735. doi: ...,Bonello F,Cancers (Basel),2019,2019/11/08,PMC6896192,,10.3390/cancers11111735,https://pubmed.ncbi.nlm.nih.gov/31694338/,/31694338/,"O.A. has received funding from Takeda, Janssen...",,"Francesca Bonello , Stefano Pulini , Stelvio...",,Luigi Podda,"[Janssen, Celgene, Amgen]","L.P. has received grants, for his role as disc..."
0,31694338,Lenalidomide Maintenance with or without Predn...,"Francesca Bonello 1, Stefano Pulini 2, Stelvio...",Cancers (Basel). 2019 Nov 5;11(11):1735. doi: ...,Bonello F,Cancers (Basel),2019,2019/11/08,PMC6896192,,10.3390/cancers11111735,https://pubmed.ncbi.nlm.nih.gov/31694338/,/31694338/,"O.A. has received funding from Takeda, Janssen...",,"Francesca Bonello , Stefano Pulini , Stelvio...",,Angelo Belotti,"[Celgene, Janssen, Amgen]",A.B. has served on the advisory boards per Cel...
0,31694338,Lenalidomide Maintenance with or without Predn...,"Francesca Bonello 1, Stefano Pulini 2, Stelvio...",Cancers (Basel). 2019 Nov 5;11(11):1735. doi: ...,Bonello F,Cancers (Basel),2019,2019/11/08,PMC6896192,,10.3390/cancers11111735,https://pubmed.ncbi.nlm.nih.gov/31694338/,/31694338/,"O.A. has received funding from Takeda, Janssen...",,"Francesca Bonello , Stefano Pulini , Stelvio...",,Antonio Palumbo,"[Takeda, Amgen, Novartis, Bristol-Myers Squibb...",A.P. is currently a Takeda employee; data repo...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
456,30442715,The potential impact of azithromycin in idiopa...,"Claudio Macaluso 1 2, Joaquín Maritano Furcada...",Eur Respir J. 2019 Feb 14;53(2):1800628. doi: ...,Macaluso C,Eur Respir J,2019,2018/11/17,,,10.1183/13993003.00628-2018,https://pubmed.ncbi.nlm.nih.gov/30442715/,/30442715/,Conflict of interest: C. Macaluso has nothing ...,Grants and funding 20719/VAC_/Versus Arthritis...,"Claudio Macaluso , Joaquín Maritano Furcada ...",,Elizabeth A Renzoni,"[Boehringer Ingelheim, Roche, Mundipharma]",Conflict of interest: E.A. Renzoni has receive...
456,30442715,The potential impact of azithromycin in idiopa...,"Claudio Macaluso 1 2, Joaquín Maritano Furcada...",Eur Respir J. 2019 Feb 14;53(2):1800628. doi: ...,Macaluso C,Eur Respir J,2019,2018/11/17,,,10.1183/13993003.00628-2018,https://pubmed.ncbi.nlm.nih.gov/30442715/,/30442715/,Conflict of interest: C. Macaluso has nothing ...,Grants and funding 20719/VAC_/Versus Arthritis...,"Claudio Macaluso , Joaquín Maritano Furcada ...",,Philip L Molyneaux,[],Conflict of interest: P.L. Molyneaux has nothi...
457,31310600,Intranasal sufentanil versus intravenous morph...,"Marc Blancher 1, Maxime Maignan 1 2, Cyrielle ...",PLoS Med. 2019 Jul 16;16(7):e1002849. doi: 10....,Blancher M,PLoS Med,2019,2019/07/17,PMC6634380,,10.1371/journal.pmed.1002849,https://pubmed.ncbi.nlm.nih.gov/31310600/,/31310600/,I have read the journals policy and the author...,Grants and funding The study was supported by ...,"Marc Blancher , Maxime Maignan , Cyrielle C...",,Maxime Maignan,"[MundiPharma, Purdue, Roche Diagnostics]",MM has received grants and personal fees from ...
457,31310600,Intranasal sufentanil versus intravenous morph...,"Marc Blancher 1, Maxime Maignan 1 2, Cyrielle ...",PLoS Med. 2019 Jul 16;16(7):e1002849. doi: 10....,Blancher M,PLoS Med,2019,2019/07/17,PMC6634380,,10.1371/journal.pmed.1002849,https://pubmed.ncbi.nlm.nih.gov/31310600/,/31310600/,I have read the journals policy and the author...,Grants and funding The study was supported by ...,"Marc Blancher , Maxime Maignan , Cyrielle C...",,Guillaume Debaty,[Zoll Medical],GD has received personal fees from Zoll Medica...


In [None]:
df_exploded = df_exploded.explode('organizations')
df_exploded['organizations'] = df_exploded['organizations'].str.lower()



Unnamed: 0,pmid,title,authors,citation,first_author,journal,publication_year,create_date,pmcid,nihms_id,doi,url,href,coi,grants,authors_index,0,author,organizations,sentence
0,31694338,Lenalidomide Maintenance with or without Predn...,"Francesca Bonello 1, Stefano Pulini 2, Stelvio...",Cancers (Basel). 2019 Nov 5;11(11):1735. doi: ...,Bonello F,Cancers (Basel),2019,2019/11/08,PMC6896192,,10.3390/cancers11111735,https://pubmed.ncbi.nlm.nih.gov/31694338/,/31694338/,"O.A. has received funding from Takeda, Janssen...",,"Francesca Bonello , Stefano Pulini , Stelvio...",,Mario Boccadoro,mundipharma,"M.B. has received honoraria from Sanofi, Celge..."
2,31122243,Spirometric assessment of emphysema presence a...,"Mariaelena Occhipinti 1, Matteo Paoletti 2, Br...",Respir Res. 2019 May 23;20(1):101. doi: 10.118...,Occhipinti M,Respir Res,2019,2019/05/25,PMC6533715,,10.1186/s12931-019-1049-3,https://pubmed.ncbi.nlm.nih.gov/31122243/,/31122243/,Dr. Occhipinti reports personal fees from Imbi...,Grants and funding ---/Ministero dellIstruzion...,"Mariaelena Occhipinti , Matteo Paoletti , Br...",,Massimo Pistolesi,mundipharma,Dr. Pistolesi reports grants from MINISTRY OF ...
3,31243042,Multicenter analysis of outcomes in blastic pl...,"Justin Taylor 1, Michael Haddadin 1, Vivek A U...",Blood. 2019 Aug 22;134(8):678-687. doi: 10.118...,Taylor J,Blood,2019,2019/06/28,PMC6706810,,10.1182/blood.2019001144,https://pubmed.ncbi.nlm.nih.gov/31243042/,/31243042/,Conflict-of-interest disclosure: N.M.-S. has r...,Grants and funding K08 CA230319/CA/NCI NIH HHS...,"Justin Taylor , Michael Haddadin , Vivek A U...",,Steven M Horwitz,mundipharma,S.M.H. has received research funding from Fort...
4,31142617,Ozone-primed neutrophils promote early steps o...,"Natacha Rocks 1, Céline Vanwinge 1, Coraline R...",Thorax. 2019 Aug;74(8):768-779. doi: 10.1136/t...,Rocks N,Thorax,2019,2019/05/31,,,10.1136/thoraxjnl-2018-211990,https://pubmed.ncbi.nlm.nih.gov/31142617/,/31142617/,Competing interests: DC is the founder of Aqui...,,"Natacha Rocks , Céline Vanwinge , Coraline R...",,Didier Cataldo,mundipharma,"DC is the founder of Aquilon Pharmaceuticals, ..."
5,32237643,Opioids crisis in France? Findings and reality,Rodrigue Deleens 1,Rev Prat. 2019 Oct;69(8):826-830.,Deleens R,Rev Prat,2019,2020/04/03,,,,https://pubmed.ncbi.nlm.nih.gov/32237643/,/32237643/,R. Deleens déclare des liens ponctuels (travau...,,Rodrigue Deleens,,Rodrigue Deleens,mundipharma,R. Deleens déclare des liens ponctuels (travau...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
453,31684698,Adductor canal blocks for postoperative pain t...,"Alexander Schnabel 1, Sylvia U Reichl 2, Steph...",Cochrane Database Syst Rev. 2019 Oct 26;2019(1...,Schnabel A,Cochrane Database Syst Rev,2019,2019/11/06,PMC6814953,,10.1002/14651858.CD012262.pub2,https://pubmed.ncbi.nlm.nih.gov/31684698/,/31684698/,Alexander Schnabel: none known. Sylvia U Reich...,,"Alexander Schnabel , Sylvia U Reichl , Steph...",,Esther Pogatzki-Zahn,mundipharma international,Esther Pogatzki‐Zahn received financial suppor...
454,30649604,Adaptation and qualitative evaluation of encou...,"Pola Hahlweg 1, Isabell Witzel 2, Volkmar Müll...",Arch Gynecol Obstet. 2019 Apr;299(4):1141-1149...,Hahlweg P,Arch Gynecol Obstet,2019,2019/01/17,PMC6435605,,10.1007/s00404-018-5035-7,https://pubmed.ncbi.nlm.nih.gov/30649604/,/30649604/,PH’s institution received the grant from resea...,Grants and funding NWF 15_10/research funding ...,"Pola Hahlweg , Isabell Witzel , Volkmar Müll...",,Isabelle Scholl,mundipharma gmbh,IS’s institution received the grant from resea...
456,30442715,The potential impact of azithromycin in idiopa...,"Claudio Macaluso 1 2, Joaquín Maritano Furcada...",Eur Respir J. 2019 Feb 14;53(2):1800628. doi: ...,Macaluso C,Eur Respir J,2019,2018/11/17,,,10.1183/13993003.00628-2018,https://pubmed.ncbi.nlm.nih.gov/30442715/,/30442715/,Conflict of interest: C. Macaluso has nothing ...,Grants and funding 20719/VAC_/Versus Arthritis...,"Claudio Macaluso , Joaquín Maritano Furcada ...",,Elizabeth A Renzoni,mundipharma,Conflict of interest: E.A. Renzoni has receive...
457,31310600,Intranasal sufentanil versus intravenous morph...,"Marc Blancher 1, Maxime Maignan 1 2, Cyrielle ...",PLoS Med. 2019 Jul 16;16(7):e1002849. doi: 10....,Blancher M,PLoS Med,2019,2019/07/17,PMC6634380,,10.1371/journal.pmed.1002849,https://pubmed.ncbi.nlm.nih.gov/31310600/,/31310600/,I have read the journals policy and the author...,Grants and funding The study was supported by ...,"Marc Blancher , Maxime Maignan , Cyrielle C...",,Maxime Maignan,mundipharma,MM has received grants and personal fees from ...


In [172]:
mundipharma_df = df_exploded[(df_exploded['organizations'].str.contains('mundipharma', na=False)) ]
mundipharma_df

Unnamed: 0,pmid,title,authors,citation,first_author,journal,publication_year,create_date,pmcid,nihms_id,doi,url,href,coi,grants,authors_index,0,author,organizations,sentence
0,31694338,Lenalidomide Maintenance with or without Predn...,"Francesca Bonello 1, Stefano Pulini 2, Stelvio...",Cancers (Basel). 2019 Nov 5;11(11):1735. doi: ...,Bonello F,Cancers (Basel),2019,2019/11/08,PMC6896192,,10.3390/cancers11111735,https://pubmed.ncbi.nlm.nih.gov/31694338/,/31694338/,"O.A. has received funding from Takeda, Janssen...",,"Francesca Bonello , Stefano Pulini , Stelvio...",,Mario Boccadoro,mundipharma,"M.B. has received honoraria from Sanofi, Celge..."
2,31122243,Spirometric assessment of emphysema presence a...,"Mariaelena Occhipinti 1, Matteo Paoletti 2, Br...",Respir Res. 2019 May 23;20(1):101. doi: 10.118...,Occhipinti M,Respir Res,2019,2019/05/25,PMC6533715,,10.1186/s12931-019-1049-3,https://pubmed.ncbi.nlm.nih.gov/31122243/,/31122243/,Dr. Occhipinti reports personal fees from Imbi...,Grants and funding ---/Ministero dellIstruzion...,"Mariaelena Occhipinti , Matteo Paoletti , Br...",,Massimo Pistolesi,mundipharma,Dr. Pistolesi reports grants from MINISTRY OF ...
3,31243042,Multicenter analysis of outcomes in blastic pl...,"Justin Taylor 1, Michael Haddadin 1, Vivek A U...",Blood. 2019 Aug 22;134(8):678-687. doi: 10.118...,Taylor J,Blood,2019,2019/06/28,PMC6706810,,10.1182/blood.2019001144,https://pubmed.ncbi.nlm.nih.gov/31243042/,/31243042/,Conflict-of-interest disclosure: N.M.-S. has r...,Grants and funding K08 CA230319/CA/NCI NIH HHS...,"Justin Taylor , Michael Haddadin , Vivek A U...",,Steven M Horwitz,mundipharma,S.M.H. has received research funding from Fort...
4,31142617,Ozone-primed neutrophils promote early steps o...,"Natacha Rocks 1, Céline Vanwinge 1, Coraline R...",Thorax. 2019 Aug;74(8):768-779. doi: 10.1136/t...,Rocks N,Thorax,2019,2019/05/31,,,10.1136/thoraxjnl-2018-211990,https://pubmed.ncbi.nlm.nih.gov/31142617/,/31142617/,Competing interests: DC is the founder of Aqui...,,"Natacha Rocks , Céline Vanwinge , Coraline R...",,Didier Cataldo,mundipharma,"DC is the founder of Aquilon Pharmaceuticals, ..."
5,32237643,Opioids crisis in France? Findings and reality,Rodrigue Deleens 1,Rev Prat. 2019 Oct;69(8):826-830.,Deleens R,Rev Prat,2019,2020/04/03,,,,https://pubmed.ncbi.nlm.nih.gov/32237643/,/32237643/,R. Deleens déclare des liens ponctuels (travau...,,Rodrigue Deleens,,Rodrigue Deleens,mundipharma,R. Deleens déclare des liens ponctuels (travau...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
453,31684698,Adductor canal blocks for postoperative pain t...,"Alexander Schnabel 1, Sylvia U Reichl 2, Steph...",Cochrane Database Syst Rev. 2019 Oct 26;2019(1...,Schnabel A,Cochrane Database Syst Rev,2019,2019/11/06,PMC6814953,,10.1002/14651858.CD012262.pub2,https://pubmed.ncbi.nlm.nih.gov/31684698/,/31684698/,Alexander Schnabel: none known. Sylvia U Reich...,,"Alexander Schnabel , Sylvia U Reichl , Steph...",,Esther Pogatzki-Zahn,mundipharma international,Esther Pogatzki‐Zahn received financial suppor...
454,30649604,Adaptation and qualitative evaluation of encou...,"Pola Hahlweg 1, Isabell Witzel 2, Volkmar Müll...",Arch Gynecol Obstet. 2019 Apr;299(4):1141-1149...,Hahlweg P,Arch Gynecol Obstet,2019,2019/01/17,PMC6435605,,10.1007/s00404-018-5035-7,https://pubmed.ncbi.nlm.nih.gov/30649604/,/30649604/,PH’s institution received the grant from resea...,Grants and funding NWF 15_10/research funding ...,"Pola Hahlweg , Isabell Witzel , Volkmar Müll...",,Isabelle Scholl,mundipharma gmbh,IS’s institution received the grant from resea...
456,30442715,The potential impact of azithromycin in idiopa...,"Claudio Macaluso 1 2, Joaquín Maritano Furcada...",Eur Respir J. 2019 Feb 14;53(2):1800628. doi: ...,Macaluso C,Eur Respir J,2019,2018/11/17,,,10.1183/13993003.00628-2018,https://pubmed.ncbi.nlm.nih.gov/30442715/,/30442715/,Conflict of interest: C. Macaluso has nothing ...,Grants and funding 20719/VAC_/Versus Arthritis...,"Claudio Macaluso , Joaquín Maritano Furcada ...",,Elizabeth A Renzoni,mundipharma,Conflict of interest: E.A. Renzoni has receive...
457,31310600,Intranasal sufentanil versus intravenous morph...,"Marc Blancher 1, Maxime Maignan 1 2, Cyrielle ...",PLoS Med. 2019 Jul 16;16(7):e1002849. doi: 10....,Blancher M,PLoS Med,2019,2019/07/17,PMC6634380,,10.1371/journal.pmed.1002849,https://pubmed.ncbi.nlm.nih.gov/31310600/,/31310600/,I have read the journals policy and the author...,Grants and funding The study was supported by ...,"Marc Blancher , Maxime Maignan , Cyrielle C...",,Maxime Maignan,mundipharma,MM has received grants and personal fees from ...


In [175]:
# df_exploded i want to see the number of authors for each pmid with groupby
mundipharma_df.groupby('pmid').size().reset_index(name='author').sort_values('pmid').to_csv('mundipharma_authors.csv', index=False)

In [204]:
pmid = 30775373
# df_exploded[(df_exploded['pmid'] == pmid) & (df_exploded['author'] == 'M Vecchi')]
df_exploded[(df_exploded['organizations'].str.contains('mundipharma', na=False)) & (df_exploded['pmid'] == pmid)]['author']
# df_exploded[(df_exploded['pmid'] == pmid)].value_counts('author')


226    Iñigo Ojanguren
Name: author, dtype: object

In [100]:
df_exploded[df_exploded['organizations'].str.contains('mundipharma', na=False)].author.value_counts()

Dave Singh             14
Claus F Vogelmeier     12
Alberto Papi           12
Jean Bousquet          11
Louisa Degenhardt      11
                       ..
Massimo Pistolesi       1
Tiago M Alfaro          1
Søren Overgaard         1
Fulvio Braido           1
Elizabeth A Renzoni     1
Name: author, Length: 346, dtype: int64

In [84]:
df_exploded.to_csv('mundipharma_coi.csv', index=False)

In [199]:
class COIReason(BaseModel):
    coi_reason: List[str] = Field(description="Reason(s) for the conflict of interest.")

def extract_coireason_info(author: str, organization: str, coi_text: str) -> List[dict]:
    try:
        messages = [
            {"role": "system", "content": f"Extract the nature of relationship between author: {author} and organization: {organization}."},
            {"role": "user", "content": coi_text},
        ]
        
        completion = client.beta.chat.completions.parse(
            model="gpt-4o-2024-08-06",
            messages=messages,
            response_format=COIReason,
            temperature=0.0,
        )
        
        event = completion.choices[0].message.parsed
        event_dict = event.dict()        
        return event_dict['cois']
    
    except Exception as e:
        print(f"Error processing COI text: {e}")
        return []



In [None]:
filtered_df.head(2)

Index(['pmid', 'title', 'authors', 'citation', 'first_author', 'journal',
       'publication_year', 'create_date', 'pmcid', 'nihms_id', 'doi', 'url',
       'href', 'coi', 'grants', 'authors_index', 'abstract', 'affiliation',
       'company', 'author', 'organizations', 'sentence', 'coi_reason_info'],
      dtype='object')

In [221]:
from tqdm import tqdm
from pydantic import BaseModel, Field
from typing import List
import pandas as pd

tqdm.pandas()

class COIReason(BaseModel):
    coi_reason: List[str] = Field(description="Reason(s) for the conflict of interest.")

def extract_coireason_info(author: str, organization: str, coi_text: str) -> List[str]:
    try:
        messages = [
            {
                "role": "system",
                "content": (
                    f"Categorize the reasons for the conflict of interest (COI) between the author '{author}' "
                    f"and the organization '{organization}'. "
                    "Provide a concise list of categories such as 'Research grants', 'Consulting fees', "
                    "'Speaking engagements', 'Stock ownership', etc. Do not include additional text or explanations."
                )
            },
            {"role": "user", "content": coi_text},
        ]
        
        completion = client.beta.chat.completions.parse(
            model="gpt-4o-2024-08-06",
            messages=messages,
            response_format=COIReason,
            temperature=0.0,
        )
        
        event = completion.choices[0].message.parsed
        event_dict = event.dict()        
        return event_dict.get('coi_reason', [])
    
    except Exception as e:
        print(f"Error processing COI text for author '{author}' and organization '{organization}': {e}")
        return []


# Apply function to filtered_df
filtered_df['coi_reason_info'] = filtered_df.progress_apply(
    lambda row: extract_coireason_info(row['author'], row['organizations'], row['coi'])
    if all(k in row for k in ['author', 'organizations', 'coi']) else [],
    axis=1
)


  0%|          | 0/349 [00:00<?, ?it/s]/var/folders/8k/kp_pwhv96pjcp_nh49ycpw0c0000gn/T/ipykernel_11722/2609236764.py:34: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  event_dict = event.dict()
100%|██████████| 349/349 [05:30<00:00,  1.05it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['coi_reason_info'] = filtered_df.progress_apply(


In [223]:
filtered_df.to_csv('COI_Grunenthal_processed.csv', index=False)

In [176]:
# Get unique PMIDs in df_combined and filtered_df
all_pmids = set(df_combined['pmid'])
filtered_pmids = set(filtered_df['pmid'])

# Find PMIDs that are in df_combined but not in filtered_df
missing_pmids = all_pmids - filtered_pmids

# Display the missing PMIDs
missing_pmids


{29607496, 33290347, 33400442}

In [195]:
# 29607496
df_combined[df_combined['pmid'] == 33400442]['authors'].values[0]

'Hartwell M, Greiner B, Dunn K, Croff J, Beaman J.'

In [196]:
df_combined[df_combined['pmid'] == 33400442]['coi'].values[0]

'Conflict of interest: AEE is an employee of Grünenthal Pharma, Spain.'

In [194]:
df_combined[df_combined['pmid'] == 33400442]

Unnamed: 0,pmid,title,authors,citation,first_author,journal,publication_year,create_date,pmcid,nihms_id,...,href,coi,grants,authors_index,abstract,affiliation,company,author,organizations,sentence
1582,33400442,Prescription Opioid Use and Laboratory Value D...,"Hartwell M, Greiner B, Dunn K, Croff J, Beaman J.",Pain Physician. 2021 Jan;24(1):E95-E100.,Hartwell M,Pain Physician,2021,2021/01/05,PMC9810267,NIHMS1859494,...,/33400442/,Conflict of interest: AEE is an employee of Gr...,Grants and funding P20 GM109097/GM/NIGMS NIH H...,"Micah Hartwell , Benjamin Greiner , Kelly Du...",Background: The use of opioids for the treatme...,Affiliations 1 Department of Psychiatry and Be...,Grünenthal,Micah Hartwell,[],Conflict of interest: AEE is an employee of Gr...
1583,33400442,Prescription Opioid Use and Laboratory Value D...,"Hartwell M, Greiner B, Dunn K, Croff J, Beaman J.",Pain Physician. 2021 Jan;24(1):E95-E100.,Hartwell M,Pain Physician,2021,2021/01/05,PMC9810267,NIHMS1859494,...,/33400442/,Conflict of interest: AEE is an employee of Gr...,Grants and funding P20 GM109097/GM/NIGMS NIH H...,"Micah Hartwell , Benjamin Greiner , Kelly Du...",Background: The use of opioids for the treatme...,Affiliations 1 Department of Psychiatry and Be...,Grünenthal,Benjamin Greiner,[],Conflict of interest: AEE is an employee of Gr...
1584,33400442,Prescription Opioid Use and Laboratory Value D...,"Hartwell M, Greiner B, Dunn K, Croff J, Beaman J.",Pain Physician. 2021 Jan;24(1):E95-E100.,Hartwell M,Pain Physician,2021,2021/01/05,PMC9810267,NIHMS1859494,...,/33400442/,Conflict of interest: AEE is an employee of Gr...,Grants and funding P20 GM109097/GM/NIGMS NIH H...,"Micah Hartwell , Benjamin Greiner , Kelly Du...",Background: The use of opioids for the treatme...,Affiliations 1 Department of Psychiatry and Be...,Grünenthal,Kelly Dunn,[],Conflict of interest: AEE is an employee of Gr...
1585,33400442,Prescription Opioid Use and Laboratory Value D...,"Hartwell M, Greiner B, Dunn K, Croff J, Beaman J.",Pain Physician. 2021 Jan;24(1):E95-E100.,Hartwell M,Pain Physician,2021,2021/01/05,PMC9810267,NIHMS1859494,...,/33400442/,Conflict of interest: AEE is an employee of Gr...,Grants and funding P20 GM109097/GM/NIGMS NIH H...,"Micah Hartwell , Benjamin Greiner , Kelly Du...",Background: The use of opioids for the treatme...,Affiliations 1 Department of Psychiatry and Be...,Grünenthal,Julie Croff,[],Conflict of interest: AEE is an employee of Gr...
1586,33400442,Prescription Opioid Use and Laboratory Value D...,"Hartwell M, Greiner B, Dunn K, Croff J, Beaman J.",Pain Physician. 2021 Jan;24(1):E95-E100.,Hartwell M,Pain Physician,2021,2021/01/05,PMC9810267,NIHMS1859494,...,/33400442/,Conflict of interest: AEE is an employee of Gr...,Grants and funding P20 GM109097/GM/NIGMS NIH H...,"Micah Hartwell , Benjamin Greiner , Kelly Du...",Background: The use of opioids for the treatme...,Affiliations 1 Department of Psychiatry and Be...,Grünenthal,Jason Beaman,[],Conflict of interest: AEE is an employee of Gr...
