In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression

In [2]:
# Example data
biography = """
 John Smith is a professor of Computer Science at XYZ University. He received his PhD in Computer Science from ABC University in 2005. Prior to joining XYZ University, he worked as a research scientist at DEF Labs. His research interests include machine learning, natural language processing, and data mining. Professor Smith is a member of the Association for Computing Machinery (ACM) and has received several awards for his contributions to the field.
"""

In [3]:
# Define the categories you want to extract
categories = ['education', 'professional', 'interests', 'affiliation', 'awards']

In [4]:
# Train a text classifier
training_data = [
    ("Dr. Robert pursued his undergraduate studies at Columbia University, where he obtained a Bachelor of Science degree in Biology with honors. ", "education"),
    (" Dr. Robert embarked on a rigorous residency program in radiology at Massachusetts General Hospital", "professional"),
    ("His special interests lie in the early detection and diagnosis of cancer using advanced imaging techniques.", "interests"),
    ("Dr. Robert is an active member of numerous professional organizations, including the American College of Radiology, the Radiological Society of North America, and the International Society of Radiology. ", "affiliation"),
    (" He has received the Outstanding Radiologist of the Year Award from the American Board of Radiology for his exceptional clinical skills and dedication to patient care. ", "awards"),
    ("He has been awarded the Distinguished Researcher Award by the Radiological Society of North America for his significant contributions to medical imaging research.", "awards"),
    
    (" Dr. Paulo's educational journey began at the prestigious University of São Paulo, where he completed his undergraduate studies in Medicine.", "education"),
    ("He has served in various leadership positions, including Head of the Radiology Department and Director of Imaging Services.  ", "professional"),
    ("His primary research interests lie in the development of advanced imaging techniques for early cancer detection, particularly in breast and lung cancer. ", "interests"),
    ("Dr. Paulo is an active member of various professional organizations, including the Brazilian Radiological Society and the International Society of Radiology. ", "affiliation"),
    ("He has received the Radiology Excellence Award from the Brazilian Radiological Society for his outstanding clinical skills and research contributions. ", "awards"),
    
    ("Dr. Nabil completed her undergraduate studies at Columbia University, where she earned a Bachelor of Science degree in Biology with honors.", "education"),
    ("She joined a leading medical center known for its cutting-edge technology and innovative research.  ", "professional"),
    ("Her special interests lie in exploring new imaging techniques, improving diagnostic accuracy, and optimizing patient outcomes. ", "interests"),
    ("She holds a faculty position at Harvard Medical School, where she actively participates in teaching and mentoring residents and fellows,She is also a senior radiologist at Massachusetts General Hospital, ", "affiliation"),
    (" She has received the prestigious Radiology Excellence Award from the American Association of Radiologists for her outstanding research and contributions to the field.", "awards"),
    
    ("Dr. Ezra's educational journey began at Harvard University, where he earned his Bachelor of Science degree in Biology.  ", "education"),
    (" Dr. Ezra specialized in radiology, undertaking a residency program at a prominent teaching hospital.  ", "professional"),
    ("He has dedicated a significant portion of his career to conducting groundbreaking research in radiology, focusing on areas such as image-guided interventions, novel imaging technologies, and artificial intelligence applications in radiology. ", "interests"),
    (" He holds positions as a consultant radiologist or as a faculty member. ", "affiliation"),
    (" He has been recognized for his excellence in research, clinical practice, and teaching, receiving prestigious honors from professional organizations and institutions. ", "awards"),
    
    ("Dr. Myers Jeffrey completed his undergraduate studies at Harvard University, earning a Bachelor of Science degree in Biology with honors. ", "education"),
    (" Dr. Myers Jeffrey joined a prestigious hospital as a staff radiologist.  ", "professional"),
    ("Dr. Jeffrey's special interests lie in the areas of oncologic imaging and interventional radiology. ", "interests"),
    ("He holds memberships in the American College of Radiology (ACR), the Radiological Society of North America (RSNA), and the Society of Interventional Radiology (SIR).  ", "affiliation"),
    (" He has received multiple awards for his outstanding research endeavors, including the Radiology Research Achievement Award and the Distinguished Radiologist of the Year Award.  ", "awards"),
    
    ("Dr. Licitra Lisa completed her undergraduate studies at Harvard University, where she earned a Bachelor of Science degree in Biology.  ", "education"),
    ("Dr. Lisa joined the prestigious National Institutes of Health (NIH) as a research fellow in the Radiology and Imaging Sciences department. ", "professional"),
    (" Dr. Licitra Lisa has been actively involved in medical research, focusing on areas such as oncological imaging, interventional radiology, and the development of novel imaging technologies. ", "interests"),
    ("Dr. Lisa is an active member of several professional organizations, including the American College of Radiology (ACR), the Society of Interventional Radiology (SIR), and the Radiological Society of North America (RSNA).  ", "affiliation"),
    (" She has been honored with the prestigious Outstanding Researcher Award by the Radiological Society of North America (RSNA) for her groundbreaking research in medical imaging.  ", "awards"),
    
    ("Dr. Jennifer R. Grandis received her medical degree from the University of Pittsburgh School of Medicine, Pennsylvania, and completed her internship from the same institution. ", "education"),
    ("Dr. Grandis is an American Cancer Society Clinical Research Professor, and a member of the American Society for Clinical Investigation, the American Association of Physicians and the Institute of Medicine of the National Academies. ", "professional"),
    ("SHe interested in Head and neck cancer, signal transduction, precision medicine, experimental therapeutics, preclinical cancer models, receptor crosstalk ", "interests"),
    ("Dr. Rubin is an active member of several professional organizations, including the American Academy of Otolaryngology-Head and Neck Surgery (AAO-HNS) and the American Head and Neck Society (AHNS).  ", "affiliation"),
    ("She has been honored with the prestigious Medical Research Excellence Award for her groundbreaking work in head and neck cancer research ", "awards"),
    
    ("Amanda Psyrri, MD-PhD, achieved her MD from Medical School at University of Patras in Greece in 1993. ", "education"),
    ("From 1996-2002, Prof. Psyrri did her Residency in Internal Medicine, followed by Medical Oncology/Hematology Fellowship at Yale University School of Medicine & Yale Cancer Center in the USA.  ", "professional"),
    ("She research projects are focused on combined immunotherapy to treat recurrent/metastatic HNSCC and identifying predictors for response to PD1 inhibitors in HNSCC.  ", "interests"),
    ("She is Board-Certified in Internal Medicine and Medical Oncology in USA and in Greece. Prof. Psyrri also did a research Fellowship in Cancer Virology at Professor Daniel DiMaio's laboratory at Yale University (2000-2002). ", "affiliation"),
    ("She has been the recipient of the Outstanding Researcher Award from the International Society of Oncology, acknowledging her significant contributions to cancer research.  ", "awards"),
    
    
    ("Barbara Burtness, MD is Professor of Medicine (Medical Oncology) at the Yale School of Medicine, Chief Translational Research Officer, and Associate Cancer Center Director for Translational Research at the Yale Cancer Center. ", "professional"),
    ("She fellowship in Memorial Sloan Kettering Cancer Center (1993),internship in Yale-New Haven Hospital (1987),MD in SUNY at Stonybrook (1986),AB in Bryn Mawr College (1982) ", "education"),
    ("She interested in Developmental TherapeuticsHead and Neck Cancers ProgramInternal MedicineK12 Calabresi Immuno-Oncology Training Program (IOTP)Medical Oncology ", "interests"),
    ("she became a member of the Eastern Cooperative Oncology Group, a consortium of researchers who test new treatments in clinical trials. ", "affiliation"),
    ("Yale Cancer Center Clinical Science Prize from Yale Cancer Cente,Robert L. Krigel Award, Excellence in Teaching and Clinical Oncology from Fox Chase Cancer Center,Top Doctor from U.S. News and World Report, Best Doctors from Best Doctors in America ", "awards"),
    
    ("Remco de Bree (1966) studied medicine in Utrecht, The Netherlands and PhD research on the VU University Medical Center was about the clinical application of radiolabelled monoclonal antibodies in head and neck cancer patients. ", "education"),
    (" Remco de Bree is a full professor and head of the department of Head and Neck Surgical Oncology at the University Medical Center Utrecht.", "professional"),
    (" His special interests are the clinical applications and improvements of diagnostic imaging techniques, sentinel node procedure and sarcopenia in head and neck cancer patients.  ", "interests"),
    ("He is the chair of the Research steering committee of the Dutch Head and Neck Society (NWHHT), board member of the NWHHT, member of several editorial boards, member of the advisory board of the Dutch Oncology Research Platform (DORP), the chair of the committee for revision of the Dutch guidelines for head and neck tumors and board member of the Dutch head and neck cancer patient advocacy group (PVHH). ", "affiliation"),
    ("He received Best PhD thesis Dutch Otolaryngology Society in 1996 ", "awards"),
    
    ("Dr. Lee Nancy  MD from University of Medicine and Dentistry of New Jersey and Radiation Oncology from Columbia Presbyterian Medical Center", "education"),
    ("Dr. Nancy Lee is a radiation oncologist in New York ", "professional"),
    ("She interested in advanced forms of radiation to treat head and neck cancer, including thyroid cancer.  ", "interests"),
    (" She is Vice Chair for Experimental Therapeutics and Department of Radiation Oncology, Service Chief for Head & Neck Radiation Oncology, Service Chief for Proton Therapy ", "affiliation"),
    ("She received  Exceptional Women in Medicine Award, Castle Connolly (2023), Castle Connolly from America's Top Doctors (2023) and Castle Connolly from New York Magazine Top Doctors (2013-2020) ", "awards"),
    
    ("Dr. Liao Chun Ta educational journey began at the prestigious National Taiwan University, where he pursued a Bachelor of Medicine degree. ", "education"),
    ("Dr. Chun Ta Liao is ENT-Otolaryngologist in Taoyuan. ", "professional"),
    ("His research focused on innovative approaches to the treatment of hearing loss, sinus disorders, voice disorders, and head and neck cancers. ", "interests"),
    ("He is a member of the American Academy of Otolaryngology-Head and Neck Surgery, the International Society of Otolaryngologists, and the Taiwan Otolaryngological Society. ", "affiliation"),
    ("Taiwan Oncology Society honors one candidate each year with Dr. Tung Ta-Cheng′s Basic Cancer Research Award. ", "awards"),
    
    ("Dr. Zhang began his medical education at Hunan Medical University in Hunan, China. ", "education"),
    (" Dr. Zhang Wei began his career as an oncologist at Beijing Cancer Hospital, where he dedicated himself to providing the highest quality care to cancer patients.", "professional"),
    ("Dr. Zhang Wei has a profound interest in medical research, constantly seeking innovative solutions and advancements in the field of oncology.  ", "interests"),
    ("In 1992, Dr. Zhang would go on to become a Research Fellow at the National Hepatobilliary and Enteric Research Center in the People’s Republic of China. ", "affiliation"),
    (" He has been honored with the prestigious Nobel Prize in Medicine for his groundbreaking research in targeted therapies. ", "awards"),
    
    ("Dr. Califano completed a fellowship in head and neck surgical oncology at Memorial Sloan Kettering Cancer Center and a residency in otolaryngology-head and neck surgery at Johns Hopkins Hospital and medical degree from Harvard Medical School.", "education"),
    ("Dr. Califano served as a professor in the Department of Otolaryngology-Head and Neck Surgery at The Johns Hopkins University, School of Medicine. ", "professional"),
    ("His expertise includes minimally invasive treatment of tumors of the oral cavity, larynx, pharynx, thyroid, and neck to minimize cosmetic impact and maximize function, with an interest in tumors of the skull base, mouth, salivary glands, and premalignant conditions of the upper aerodigestive tract. ", "interests"),
    ("Dr. Califano currently serves as director of the UC San Diego Health Moores Cancer Center, which is the region's only National Cancer Institute (NCI)-designated Comprehensive Cancer Center, a designation reserved for institutions with the highest achievements in cancer research, clinical care, education, and community contributions. ", "affiliation"),
    ("he has received the Excellence in Medical Research award from the National Cancer Institute for his groundbreaking research endeavors. ", "awards"),
    
    
    ("Dr. Lewis James pursued his undergraduate studies at Stanford University, where he graduated with honors, earning a Bachelor of Science degree in Biology. ", "education"),
    ("He join the faculty of Memorial Sloan Kettering Cancer Center as an attending physician in the Department of Medical Oncology. ", "professional"),
    (" His research focuses on developing novel immunotherapeutic approaches to target and eliminate cancer cells. ", "interests"),
    ("Dr. James is an active member of various professional organizations, including the American Society of Clinical Oncology (ASCO), the American Association for Cancer Research (AACR), and the Society for Immunotherapy of Cancer (SITC). ", "affiliation"),
    ("He was honored with the Distinguished Researcher Award by the American Society of Clinical Oncology (ASCO) for his groundbreaking work in immunotherapy. ", "awards"),
    
    ("Professor Kevin Harrington studied medicine at St Bartholomew’s Hospital, London, and began focusing on head and neck cancer while a PhD student at Hammersmith Hospital. ", "education"),
    ("Professor Harrington was appointed as Head of the ICR’s Division of Radiotherapy and Imaging in 2013. ", "professional"),
    ("Professor Harrington is the national chair of the CRUK Advanced Radiotherapy Technologies Network Accelerator (ART-NET) and has held a CRUK programme grant for research in head and neck cancer  ", "interests"),
    (" He is Director of the CRUK/Wellcome Clinical Research PhD studentship programme at ICR and co-chairs the Research Sub-Committee of the ICR/Imperial College CRUK Major Centre. ", "affiliation"),
    ("He honors are the Distinguished Oncologist of the Year award and the Excellence in Cancer Research award. ", "awards"),
    
    ("René Leemans, MD, PhD is chair of the Department of Otolaryngology Head and Neck Surgery at the VU University Medical Center ", "education"),
    ("Professor C. René Leemans, MD, PhD is chair of the Department of Otolaryngology Head and Neck Surgery at Amsterdam UMC.  ", "professional"),
    ("His special interests include head and neck oncology, reconstructive and microvascular surgery, and basic research. ", "interests"),
    (" He is Director of the Otolaryngology Residency Program and the Advanced Fellowship Program in Head and Neck Surgery and Oncology at Amsterdam UMC and  his scientific standing has been recognised by election as President of the Netherlands Society of Otolaryngology-Head and Neck Surgery, and the Dutch and, currently European Head and Neck Societies ", "affiliation"),
    (" He has been honored with the Distinguished Oncologist Award by the International Society of Oncology for his outstanding achievements in patient care, research, and advocacy.", "awards"),
    
    ("Dr Haddad received his MD Degree from St. Joseph University French School of Medicine in Beirut and completed his residency in internal medicine at St Luke's Roosevelt Medical Center in New York City, and completed a fellowship in hematology oncology at the University of Maryland Cancer Center in Baltimore. ", "education"),
    ("He joined the prestigious Memorial Sloan Kettering Cancer Center in New York City, where he completed his residency in Internal Medicine ", "professional"),
    ("Dr. Haddad Robert has a particular interest in immunotherapy and targeted therapies for various types of cancer.  ", "interests"),
    (" He is leading numeous trials of novel agents in head and neck cancer , particularly in the areas of immunotherapy and novel biologic agents .", "affiliation"),
    ("he has procured include the Outstanding Oncologist Award from the American Society of Clinical Oncology (ASCO) and the Innovator in Cancer Research Award from the National Cancer Institute (NCI). ", "awards"),
    
    ("Dr. Gregory completed his undergraduate studies at Harvard University, where he majored in Biology with a focus on molecular genetics and graduated with honors and went on to pursue a Doctor of Medicine (MD) degree at Johns Hopkins University School of Medicine.  ", "education"),
    ("He joined the faculty at Sloan Kettering as an attending physician. ", "professional"),
    ("His primary research interests include the development of targeted therapies for rare and aggressive cancers, precision medicine, and immunotherapy. ", "interests"),
    ("Dr. Gregory is an esteemed member of various professional organizations and medical societies, including the American Society of Clinical Oncology (ASCO), the American Association for Cancer Research (AACR), and the Society for Immunotherapy of Cancer (SITC).  ", "affiliation"),
    ("He has been honored with the American Association for Cancer Research (AACR) Award for Outstanding Achievement in Clinical Research and the American Society of Clinical Oncology (ASCO) Young Investigator Award. ", "awards"),
    
    ("Dr. Tuula completed his undergraduate studies at the University of Helsinki, where he earned his Bachelor of Science degree in Biology.  ", "education"),
    (" Dr. Tuula joined the faculty of Helsinki University Hospital as a consultant oncologist.  ", "professional"),
    ("His special interests lie in the fields of targeted therapies, immunotherapy, and personalized medicine. ", "interests"),
    ("Dr. Tuula is an active member of several professional organizations, including the American Society of Clinical Oncology (ASCO), the European Society for Medical Oncology (ESMO), and the Finnish Oncology Society. ", "affiliation"),
    ("Researcher Award from the Finnish Cancer Society and the International Oncology Excellence Award.  ", "awards"),
    
    ("Dr. William. completed his undergraduate studies at Harvard University, where he earned a Bachelor of Science degree in Biology ", "education"),
    (" He completed his residency training in Internal Medicine at Massachusetts General Hospital, where he gained comprehensive clinical experience in diagnosing and treating various medical conditions.  ", "professional"),
    (" His primary areas of interest include novel targeted therapies, immunotherapy, and precision medicine.  ", "interests"),
    ("Dr. William is an active member of various professional organizations, including the American Society of Clinical Oncology (ASCO), the American Association for Cancer Research (AACR), and the Society for Immunotherapy of Cancer (SITC).  ", "affiliation"),
    (" He has received the prestigious American Cancer Society's Award for Excellence in Cancer Research and the Society for Immunotherapy of Cancer's Lifetime Achievement Award. ", "awards"),
     
      
    ("Dr. Tahara Makoto pursued his undergraduate studies at the University of Tokyo, where he earned his Bachelor of Medicine degree  ", "education"),
    ("After completing his training, Dr. Makoto joined the prestigious Tokyo Cancer Institute as a junior attending physician.  ", "professional"),
    ("s. He has a particular interest in precision medicine and targeted therapies, striving to develop personalized treatment approaches for each patient.  ", "interests"),
    ("Dr. Tahara Makoto is an active member of several professional organizations, including the American Society of Clinical Oncology (ASCO), the European Society for Medical Oncology (ESMO), and the Japanese Society of Medical Oncology (JSMO).  ", "affiliation"),
    (" He has received numerous awards and honors for his groundbreaking research, including the prestigious Nobel Prize in Medicine, awarded in 2020, for his significant advancements in precision oncology and targeted therapies.  ", "awards"),
    
      
    (" Dr. Erich pursued his medical education at Johns Hopkins University School of Medicine, where he earned his Doctor of Medicine (M.D.) degree. ", "education"),
    (" Dr. Sturgis Erich underwent specialized training in oncology at the renowned Memorial Sloan Kettering Cancer Center in New York City.  ", "professional"),
    (" His research interests encompass various aspects of cancer biology, including the identification of novel therapeutic targets, personalized medicine approaches, and immunotherapy advancements.  ", "interests"),
    ("He is a Fellow of the American Society of Clinical Oncology (ASCO) and serves on the board of directors for the Society for Immunotherapy of Cancer (SITC).  ", "affiliation"),
    ("Dr. Erich has received the Distinguished Physician Award for his exemplary dedication to his patients and the advancement of oncology. ", "awards"),
    
      
    ("Dr. Adkins Douglas completed his undergraduate studies at Harvard University, where he earned a Bachelor of Science degree in Biology ", "education"),
    ("Dr. Adkins Douglas underwent rigorous training as an oncology resident at Massachusetts General Hospital ", "professional"),
    (" His primary areas of focus include immunotherapy, targeted therapies, and precision medicine.  ", "interests"),
    ("He is member of American Society of Clinical Oncology (ASCO)American Association for Cancer Research (AACR), Society for Immunotherapy of Cancer (SITC), American Medical Association (AMA) ", "affiliation"),
    ("he has received include the Outstanding Contributions to Cancer Research Award from the American Society of Clinical Oncology (ASCO) and the Distinguished Researcher Award from the National Cancer Institute (NCI). ", "awards"),
    
      
    ("CAROL R. BRADFORD completed BS in University of Michigan, College of LS and A, Ann Arbor, Michigan, Cellular and Molecular Biology, Magna Cum Laude, Highest Honors ", "education"),
    ("Dr. Carol embarked on residency training in oncology at a renowned cancer center. ", "professional"),
    ("Her primary focus is on identifying novel treatment strategies for cancer patients, particularly in the field of immunotherapy and targeted therapies. ", "interests"),
    ("She is fellow of the American Society of Clinical Oncology (ASCO) and a member of the American Association for Cancer Research (AACR). ", "affiliation"),
    ("She honor with Recipient of the 2010 University of Michigan Medical School Community Service Award, Recipient of the 2009 Castle Connolly National Physician of the Year Award,Fellow of the Executive Leadership in Academic Medicine Institute, Drexel University, 2008-2009,Elected into the Collegium Oto-Rhino-Laryngologicum Amicitae Sacrum (CORLAS), a prestigiousinternational honorary society of Otolaryngologists, 2006-present ", "awards"),
    
      
    ("Dr. Thomas completed her residency in Internal Medicine at Massachusetts General Hospital, followed by a fellowship in Medical Oncology at Memorial Sloan Kettering Cancer Center. ", "education"),
    (" Dr. Carey Thomas joined the faculty at Memorial Sloan Kettering Cancer Center as an attending physician in the Department of Medical Oncology. ", "professional"),
    ("Her special interests lie in targeted therapies for breast cancer, with an emphasis on precision medicine and personalized treatment plans.  ", "interests"),
    ("Dr. Carey Thomas holds memberships in several professional organizations, including the American Society of Clinical Oncology, the American Association for Cancer Research, and the Society for Immunotherapy of Cancer. ", "affiliation"),
    (" , she has received the Oncology Excellence Award from the American Society of Clinical Oncology.", "awards"),
    
      
    ("Dr. Fayette Jerome pursued his undergraduate studies at Harvard University, where he excelled in the field of biology. ", "education"),
    (" Dr. Fayette Jerome began his residency in Internal Medicine at Massachusetts General Hospital. ", "professional"),
    (" His special interests include targeted therapies, immunotherapy, and precision medicine. ", "interests"),
    ("He holds memberships in the American Society of Clinical Oncology (ASCO), the American Association for Cancer Research (AACR), and the International Society of Oncology (ISO) ", "affiliation"),
    (" He has received numerous awards and accolades for his exceptional work, including the prestigious Nobel Prize in Medicine in 2020 for his groundbreaking research on personalized cancer therapies. ", "awards"),
    
      
    ("Dr. Fakhry Carole pursued her education with utmost dedication, obtaining her Bachelor of Science degree in Biology from  Johns Hopkins University School of Medicine (2003) ", "education"),
    ("She joined a leading cancer center, where she worked diligently to provide comprehensive and compassionate care to her patients ", "professional"),
    (" Her special interests lie in exploring innovative therapies, targeted treatments, and the role of immunotherapy in cancer management.  ", "interests"),
    ("Her as a science advisory board member and to be a financial co-sponsor of her work elucidating oral cancer epidemiology, HPV natural history, and innovative mechanisms for the screening and early discovery of oral and oropharyngeal cancers and identifying which cancer patients are at risk for extra capsular spread of the disease. ", "affiliation"),
    ("She is Honor with American Academy of Otolaryngology travel grant- Conference on Research Education and Training in Otolaryngology (CRETO). ", "awards"),
    
      
    (" Dr. Rodrigo Juan Pablo completed his undergraduate studies at the Universidad de Buenos Aires, where he earned his Bachelor of Medicine degree.", "education"),
    ("Pablo began his professional journey as an oncologist at the Memorial Sloan Kettering Cancer Center.  ", "professional"),
    ("His research endeavors have centered around immunotherapy, targeted therapies, and the development of biomarkers for early cancer detection. ", "interests"),
    ("He is member Spanish Society Otolaryngology (board directors since 1996). ", "affiliation"),
    ("He is honor with Excellence in Cancer Research Award from the International Association for the Study of Lung Cancer. ", "awards"),
    
      
    ("Dr. Zevallos Jose obtained his Bachelor of Medicine and Surgery degree from the prestigious University of Buenos Aires Medical School in Argentina.  ", "education"),
    (" Dr. Jose embarked on an illustrious career as an Otolaryngologist ", "professional"),
    ("His special interests lie in exploring innovative treatment modalities, surgical techniques, and medical devices to enhance patient outcomes in Otolaryngology. ", "interests"),
    ("Dr. Jose maintains active membership in various professional organizations, including the American Academy of Otolaryngology-Head and Neck Surgery, the Argentine Society of Otolaryngology, and the International Federation of Otorhinolaryngological Societies. ", "affiliation"),
    ("Dr. Zevallos Jose has been honored with numerous awards and recognitions for his contributions to the field of Otolaryngology.  ", "awards"),
    
      
    ("Dr. William pursued his undergraduate studies at the prestigious Harvard University, where he obtained a Bachelor of Science degree in Biology.  ", "education"),
    ("Dr. William joined the esteemed faculty at Johns Hopkins School of Medicine as a resident in pathology.  ", "professional"),
    (" His primary research interests lie in the areas of cancer pathology and molecular diagnostics.  ", "interests"),
    ("Dr. William is an active member of several professional organizations and societies, including the American Society of Clinical Pathology (ASCP), the College of American Pathologists (CAP), and the American Association of Pathologists (AAP).  ", "affiliation"),
    ("He has authored several highly regarded medical textbooks, including Advances in Cancer Pathology: From Bench to Bedside and Molecular Diagnostics in Oncology: A Comprehensive Guide. ", "awards"),
    
      
    ("Dr. Roh Jong Lyel completed his undergraduate studies at Seoul National University ", "education"),
    ("He began his career as a resident physician at Seoul National University Hospital,including Associate Professor and Professor in the Department of Otorhinolaryngology-Head and Neck Surgery at Seoul National University College of Medicine. ", "professional"),
    (" His research interests revolve around the molecular biology of head and neck tumors, the development of targeted therapies, and the identification of biomarkers for improved diagnosis.", "interests"),
    (" He is a member of the American Society of Clinical Oncology (ASCO), the International Association for the Study of Lung Cancer (IASLC), and the Korean Society of Surgical Oncology (KSSO)", "affiliation"),
    ("He has been honored with the Excellence in Research Award from the Korean Society of Otorhinolaryngology-Head and Neck Surgery, the Distinguished Scientist Award from the Korean Cancer Association, and the Outstanding Researcher Award from Seoul National University Hospital ", "awards"),
    
      
    ("Dr. Francis Worden received his medical degree from the Indiana University School of Medicine in 1993. ", "education"),
    ("Dr. Worden joined the faculty at the University of Michigan Rogel Cancer Center, where he is currently working as a clinical investigator with both the multidisciplinary head and neck oncology team and the multidisciplinary lung cancer team. ", "professional"),
    ("Dr. Worden's research interests include organ preservation in head and neck cancer and endocrine oncology. ", "interests"),
    ("Francis P. Worden is an oncologist in Ann Arbor, Michigan and is affiliated with multiple hospitals in the area, including University of Michigan Health-Ann Arbor and Veterans Affairs Ann Arbor Healthcare System. ", "affiliation"),
    ("He has been honored with the prestigious Outstanding Researcher Award from the American Society of Clinical Oncology (ASCO) for his groundbreaking work in personalized cancer therapies.  ", "awards"),
      
    ("Dr. Duvvuri completed his undergraduate education in Bioengineering at the University of Pennsylvania, and remained there to obtain his MD and PhD degrees.  ", "education"),
    ("Umamaheswar Duvvuri, MD, PhD, FACS assistant professor of otolaryngology,specializes in head and neck oncology.  ", "professional"),
    (", Duvvuri U. Primary surgery for human papillomavirus-associated oropharyngeal cancer in Survival outcomes with or without adjuvant treatment. ", "interests"),
    ("Dr. Umamaheswar Duvvuri is an ENT-otolaryngologist in Pittsburgh, Pennsylvania and is affiliated with multiple hospitals in the area, including UPMC Horizon and UPMC Presbyterian Shadyside. ", "affiliation"),
    ("He awarded include the Excellence in Oncology Award, the Distinguished Researcher Award, and the Humanitarian Doctor of the Year Award. ", "awards"),
    
      
    ("Dr. Chen's educational journey began with a Bachelor of Science degree in Biology from National Taiwan University. ", "education"),
    ("She initially joined the faculty of Taipei Medical University as an assistant professor ", "professional"),
    ("Her research has focused on identifying novel biomarkers, developing targeted therapies, and enhancing the efficacy of immunotherapies. ", "interests"),
    ("She is a fellow of the American Association for Cancer Research (AACR) and serves on the advisory board of the International Society for Immunotherapy of Cancer (SITC). ", "affiliation"),
    ("She has been honored with several awards, including the prestigious Lasker Award for her groundbreaking research in immunotherapy.  ", "awards"),
    
      
    ("Dr. Li Guojun pursued his undergraduate studies at Peking University, where he obtained a Bachelor of Medicine degree in 1995.  ", "education"),
    ("Dr. Li Guojun joined the prestigious World Health Organization (WHO) as a research fellow and He served as the Director of Epidemiology at the Centers for Disease Control and Prevention (CDC) in Atlanta. ", "professional"),
    ("His primary areas of focus include infectious disease epidemiology, global health, and disease surveillance. ", "interests"),
    ("He is a fellow of the American College of Epidemiology (ACE) and a member of the International Society for Infectious Diseases (ISID), the Society for Epidemiologic Research (SER), and the International Association of Epidemiology (IAE). ", "affiliation"),
    ("He has received include the International Epidemiological Association (IEA) Award for Outstanding Contribution to Epidemiology, the Robert Koch Medal for Excellence in Infectious Disease Research, and the Albert Lasker Award for Public Health Achievement. ", "awards"),
    
      
    ("Dr. Nam Soon Yuhl completed his undergraduate studies at a prestigious university, where he obtained a Bachelor of Science degree in Biology . ", "education"),
    ("Dr. Yuhl embarked on his residency training in Internal Medicine at a respected teaching hospital.  ", "professional"),
    (" His research has focused on developing personalized treatment plans for patients, improving the quality of life during cancer treatment, and discovering novel targeted therapies. ", "interests"),
    ("  Dr. Yuhl actively engages with fellow oncologists and healthcare professionals, collaborating on research projects and sharing knowledge to enhance patient care. Dr. Yuhl also actively engages with fellow oncologists and healthcare professionals, collaborating on research projects and sharing knowledge to enhance patient care.", "affiliation"),
    ("He has been honored for his excellence in patient care, research contributions, and commitment to advancing cancer treatment.  ", "awards"),
    
    
      
    ("Dr. von Buchwald Christian pursued his undergraduate education at Ludwig Maximilian University of Munich, where he graduated with a Bachelor of Medicine then went on to complete his specialization in oncology at Charité - Universitätsmedizin Berlin, one of Europe's leading medical institutions and he earned his Medical Doctorate (MD) degree from Charité ", "education"),
    (" He began his professional journey as a resident physician at Charité, gaining hands-on experience in diagnosing and treating various forms of cancer ", "professional"),
    ("Dr. von Buchwald Christian has a particular interest in head and neck cancers. ", "interests"),
    ("Dr. von Buchwald Christian is an active member of several professional organizations and societies, including the American Society of Clinical Oncology (ASCO), the European Society for Medical Oncology (ESMO), and the International Association of Head and Neck Oncology (IAHNO). He is also a fellow of the American College of Surgeons (ACS) and a member of the International Society for Biological Therapy of Cancer (iSBTc). ", "affiliation"),
    ("He has received numerous awards, including the prestigious International Society of Head and Neck Oncology (ISHNO) Young Investigator Award for his outstanding research in the field of head and neck cancers.  ", "awards"),
    
      
    ("Dr. Tanguy embarked on his educational journey at the prestigious University of Paris. ", "education"),
    (" Dr. Tanguy joined the renowned Gustave Roussy Cancer Center in Villejuif, France, as a staff oncologist.  ", "professional"),
    (" His primary research interests lie in the field of immunotherapy, specifically focusing on the development of novel treatment modalities for advanced-stage cancers. ", "interests"),
    ("Dr. Tanguy is an active member of several professional organizations, including the American Society of Clinical Oncology (ASCO), the European Society for Medical Oncology (ESMO), and the International Association for the Study of Lung Cancer (IASLC). ", "affiliation"),
    ("Dr. Tanguy has received numerous accolades and awards for his exceptional contributions to the field of oncology and in 2015, he was honored with the prestigious Jean Bernard Award ", "awards"),
    
      
    ("Dr. Gillison completed her the Johns Hopkins School of Hygiene and Public Health, Baltimore, MD, USA, PHD, Clinical Investigation; Mentor: Keerti V. Shah, MD, DrPH ", "education"),
    ("She Professor of Medicine, Department of Thoracic/Head and Neck Medical Oncology, Division of Cancer Medicine, The University of Texas MD Anderson Cancer Center, Houston, TX ", "professional"),
    ("Her laboratory focuses on the role of human papillomavirus (HPV) infection in head and neck malignancies. ", "interests"),
    ("Dr. Gillison is an active member of several professional organizations and societies, including the American Society of Clinical Oncology (ASCO), the American Association for Cancer Research (AACR), and the International Society for Stem Cell Research (ISSCR). ", "affiliation"),
    (" She has received the Distinguished Researcher Award from the American Society of Clinical Oncology (ASCO) for her outstanding research contributions. ", "awards"),
    
      
    ("Dr. Wang completed his undergraduate education at National Taiwan University. ", "education"),
    (" He joined the renowned Massachusetts General Hospital in Boston as a resident physician, specializing in medical oncology.  ", "professional"),
    ("  He has conducted extensive studies on the genetic and molecular aspects of cancer, aiming to develop targeted therapies for improved patient outcomes.", "interests"),
    ("Dr. Wang is an active member of various professional organizations, including the American Society of Clinical Oncology (ASCO), the American Association for Cancer Research (AACR), and the Society for Translational Oncology (STO).  ", "affiliation"),
    ("He has been honored with the Medical Research Excellence Award from the American Association for Cancer Research (AACR) for his outstanding contributions to cancer research and he received the Excellence in Oncology Award from the American Society of Clinical Oncology (ASCO) in recognition of his exceptional patient care and clinical expertise. ", "awards"),
    
      
    ("Doctor of Medicine, University Turku, Finland, 1977. Doctor of Philosophy, University Turku, Finland, 1985. ", "education"),
    ("He general practitioner several municipal health services, Finland, 1977-1978. Assistant professor Turku University, 1988-1989. Member staff Turku University Central Hospital, 1989-1996, senior staff member, since 1996. ", "professional"),
    (" Dr. Reidar has shown a particular interest in medical research and advancements in otolaryngology.  ", "interests"),
    ("He member Finnish Medical Association (administrative council 1980-1985, regional chairman since 1989), Finnish Society for Otorhinolaryngology (board directors 1993-1998), American Academy Otolaryngology, American Association for Cancer Research, Collegium Oto-Rhino-Laryngologicum Amicitiae Sacrum. ", "affiliation"),
    ("He has received accolades for his clinical excellence, outstanding patient care, and significant research contributions.  ", "awards"),
    
      
    ("Dr. Alfio completed his undergraduate studies at the University of Palermo. ", "education"),
    ("Dr. Alfio began his professional career as a resident at the University of Palermo's Department of Otolaryngology ", "professional"),
    ("Dr. Alfio's research interests primarily revolve around head and neck oncology, laryngology, and surgical pathology.  ", "interests"),
    ("Dr. Alfio holds memberships in several professional organizations, including the American Academy of Otolaryngology-Head and Neck Surgery (AAO-HNS), the European Laryngological Society (ELS), and the Italian Society of Otorhinolaryngology.  ", "affiliation"),
    (" He awards are the Excellence in Otolaryngology Award and the Outstanding Medical Researcher Award.", "awards"),
    
      
    (" Dr. Primoz completed his undergraduate studies at a renowned medical school, where he excelled academically and demonstrated a profound interest in the field of oncology. ", "education"),
    ("He joined a prominent cancer center where he began his clinical practice, working alongside leading experts in the field. ", "professional"),
    ("His special interests lie in exploring targeted therapies for specific types of cancers and studying the potential of immunotherapy in cancer treatment. ", "interests"),
    (" His memberships include renowned organizations such as the American Society of Clinical Oncology (ASCO) and the European Society for Medical Oncology (ESMO). ", "affiliation"),
    ("He has been honored with prestigious awards recognizing his groundbreaking research, clinical expertise, and dedication to patient care. ", "awards"),
    
      
    ("Prince returned to Dalhousie University and completed a residency in otolaryngology-head and neck Surgery, followed by a two-year fellowship in advanced head and neck oncology and microvascular reconstructive surgery at the University of Michigan. ", "education"),
    ("He also served as the chief of the section of otolaryngology at the Ann Arbor VA Medical Center from 2000 until 2014; he continues to serve there as an active part time staff member. ", "professional"),
    (" He has a special interest and advanced training in the treatment of patients with head and neck cancer and in reconstruction of complex defects in the head and neck including the mouth, jaw, pharynx, larynx and skull. ", "interests"),
    ("Dr. Mark is an active member of several prestigious medical societies, including the American Academy of Otolaryngology-Head and Neck Surgery, the American Medical Association, and the Society of University Otolaryngologists. ", "affiliation"),
    ("He honor with Poliquin Prize for Original Research in Basic Science and Department of Otolaryngology, Scientific Achievement Award in Otolaryngology, Dalhousie University ", "awards"),
    
      
    ("She completed her undergraduate studies at a prestigious institution, earning a Bachelor of Science degree in Pharmacology. ", "education"),
    ("Dr. Li Jing embarked on a prolific professional journey as a pharmacologist.  ", "professional"),
    ("Dr. Li Jing has dedicated her career to advancing medical research, particularly in the areas of drug metabolism, pharmacokinetics, and neuropharmacology.  ", "interests"),
    (" She is a proud member of the American Society for Pharmacology and Experimental Therapeutics (ASPET), the International Society for Pharmacology (IUPHAR), and the Pharmacological Society. ", "affiliation"),
    ("Dr. Jing has been honored with the Pharmacologist of the Year Award by the Pharmacological Society for her exceptional contributions to the advancement of pharmacology and therapeutic research. ", "awards"),
    
      
    ("Dr. Vermorken completed his undergraduate studies at the University of Leuven in Belgium, where he received his Bachelor's degree in Medicine. ", "education"),
    ("He then joined the Department of Medical Oncology at the same institution, where he worked as a clinical oncologist and researcher for several years. ", "professional"),
    (" His research interests lie in head and neck cancer, gynecological cancer, and lung cancer.  ", "interests"),
    ("Dr. Vermorken is an active member of various professional organizations and associations, including the European Society for Medical Oncology (ESMO), the American Society of Clinical Oncology (ASCO), and the European Organization for Research and Treatment of Cancer (EORTC).  ", "affiliation"),
    ("He received the ESMO award in 2007 and on March 1, 2013, he received the title of Commander in the Order of Leopold for his contributions to oncology. ", "awards"),
    
      
    ("Dr Robert Lim is a graduate of both the University of St Andrews and University of Edinburgh where he obtained his medical degree (MBChB).  ", "education"),
    ("He has been a member of the Specialist Training Committee in Medical Oncology, instrumental in training many doctors both in internal medicine and oncology, providing invaluable mentorship and guidance.  ", "professional"),
    (" His special interests lie in precision medicine and personalized cancer therapies, aiming to tailor treatment plans to individual patients' unique molecular profiles. ", "interests"),
    ("He is member of American Society of Clinical Oncology and member of European Society of Medical Oncology ", "affiliation"),
    (" He has been the recipient of the coveted Excellence in Oncology Research award ", "awards"),
    
      
    ("Dr. Theodoros pursued his undergraduate education at the University of Athens. ", "education"),
    ("He joined the faculty of a prominent medical university, where he served as a professor of oncology and directed a research laboratory dedicated to understanding the molecular mechanisms of cancer development and progression. ", "professional"),
    ("His research interests primarily revolve around targeted therapies for various types of cancer, with a specific emphasis on developing novel treatment approaches to improve patient outcomes and quality of life. ", "interests"),
    ("Dr. Theodoros is an active member of various professional organizations, including the American Society of Clinical Oncology (ASCO), the European Society for Medical Oncology (ESMO), and the International Association for the Study of Lung Cancer (IASLC).  ", "affiliation"),
    ("He has received the prestigious Outstanding Researcher Award from the American Society of Clinical Oncology (ASCO) for his groundbreaking contributions to cancer research. ", "awards"),
    
    
      
    ("Dr. Machiels completed his undergraduate studies at the University of Brussels, where he received his Bachelor of Medicine and Bachelor of Surgery (MBBS) degree. ", "education"),
    ("He joined a prominent cancer center in Brussels, where he worked tirelessly to provide personalized and evidence-based care to his patients. ", "professional"),
    ("Dr. Machiels has a particular interest in the field of immunotherapy and its potential to revolutionize cancer treatment. ", "interests"),
    ("Dr. Machiels is an active member of several prestigious medical associations and societies, including the European Society for Medical Oncology (ESMO), the American Society of Clinical Oncology (ASCO), and the International Association for the Study of Lung Cancer (IASLC). ", "affiliation"),
    (" He has been honored with the Distinguished Oncologist of the Year award by the European Society for Medical Oncology (ESMO) for his outstanding clinical and research achievements.  ", "awards"),
    
    
      
    #(" ", "education"),
    #(" ", "professional"),
    #(" ", "interests"),
    #(" ", "affiliation"),
    #(" ", "awards"),
    
    

    

]


In [5]:
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform([text for text, _ in training_data])
y_train = [label for _, label in training_data]

classifier = LogisticRegression()
classifier.fit(X_train, y_train)

# Process the biography and classify each sentence
sentences = [sentence.strip() for sentence in biography.split('.') if sentence.strip()]

for sentence in sentences:
    X_test = vectorizer.transform([sentence])
    predicted_category = classifier.predict(X_test)[0]
    if predicted_category in categories:
        print("Category:", predicted_category)
        print("Sentence:", sentence)
        print()

Category: education
Sentence: John Smith is a professor of Computer Science at XYZ University

Category: education
Sentence: He received his PhD in Computer Science from ABC University in 2005

Category: professional
Sentence: Prior to joining XYZ University, he worked as a research scientist at DEF Labs

Category: interests
Sentence: His research interests include machine learning, natural language processing, and data mining

Category: awards
Sentence: Professor Smith is a member of the Association for Computing Machinery (ACM) and has received several awards for his contributions to the field

