In [None]:
# Initialize the KeyBERT model
kw_model = KeyBERT()

# Custom stop words list
custom_stop_words = [
    "jurisdiction", "authority", "organization", "organizations", "authority", "organizational", "nations", "oceans",
    "establishment", "development", "provides", "purpose", "outline", "ocean", "mandate", "doalos"
]

# Function to extract keywords from a document, excluding custom stop words
def keywords_extractor(doc):
    # Extract keywords using KeyBERT
    keywords = kw_model.extract_keywords(doc, vectorizer=KeyphraseCountVectorizer(), stop_words='english')

    # Filter out keywords that are in the custom stop words list
    filtered_keywords = [keyword[0] for keyword in keywords if keyword[0].lower() not in custom_stop_words]

    return filtered_keywords


# Function to extract keyphrases from a document, excluding custom stop words
def keyphrases_extractor(doc):
    # Extract keyphrases using KeyBERT
    keywords = kw_model.extract_keywords(doc, keyphrase_ngram_range=(1, 3), stop_words='english',
                              use_maxsum=True, nr_candidates=15, top_n=10, highlight=True)

    # Extract keywords
    filtered_keyphrases = [keyword[0] for keyword in keywords if keyword[0].lower() not in custom_stop_words]

    return filtered_keyphrases

In [None]:
# Clean
data['Source of Jurisdiction Cleaned'] = data['Source of Jurisdiction'].apply(clean_text)
# Apply the function to eachorganizations document and create a new column 'keywords'
data['Source of Jurisdiction Keywords'] = data['Source of Jurisdiction Cleaned'].apply(keywords_extractor)
data['Source of Jurisdiction Keyphrases'] = data['Source of Jurisdiction Cleaned'].apply(keyphrases_extractor)
# df['Spatial Jurisdiction_highlights'] = df['Spatial Jurisdiction'].apply(keywords_highlight)

In [None]:
category_map = {
    "Ocean and Marine Science": [
        "Advance Ocean Science", 
        "Improve Methodologies", 
        "Promote Peaceful Ocean Study", 
        "Enhance International Cooperation", 
        "Facilitate Data Exchange", 
        "Share Research Results", 
        "Develop Research Infrastructure and Support Oceanography Training", 
        "Marine Research and Investigations"
    ],
    
    "Agriculture and Food Security": [
        "Collect and Disseminate Information on Nutrition, Food, and Agriculture", 
        "Promote Scientific and Technological Research in Agriculture", 
        "Improve Education and Administration in Agriculture", 
        "Conserve Natural Resources in Agriculture", 
        "Enhance Agricultural Production Methods", 
        "Support Food and Agricultural Product Marketing", 
        "Promote Agricultural Credit and Commodity Policies", 
        "Provide Technical Assistance for Agriculture", 
        "Organize Missions for Government Support in Agriculture", 
        "Mobilizing Resources for Agricultural Development", 
        "Promoting Global Food Security", 
        "Financing Food Production and Policy Strengthening"
    ],
    
    "Shipping and Maritime Law": [
        "Provide Machinery for Cooperation Among Governments on Technical Shipping Matters", 
        "Encourage Removal of Discriminatory Actions and Restrictions in International Shipping", 
        "Address Unfair Restrictive Practices by Shipping Concerns", 
        "Consider Matters Related to Shipping's Effect on the Marine Environment", 
        "Facilitate Exchange of Information Among Governments", 
        "Consider Matters Affecting Freedom of Shipping for All Flags in International Trade",
        "Maritime Safety and Awareness"
    ],
    
    "International Law and Sustainable Marine Resource Management": [
        "Promote Understanding of UNCLOS and Related Agreements", 
        "Aid in Uniform Application of Legal Frameworks", 
        "Support Meetings on Sustainable Marine Resource Management and Dispute Resolution", 
        "Assist Commission on Limits of the Continental Shelf with Boundary Delineation", 
        "Provide Policy Guidance on Ocean and Maritime Law to the General Assembly", 
        "Offer Support to UNFSA Review Conference and State Parties Consultations", 
        "Provide Policy Recommendations for Ocean Affairs and Sustainable Fisheries"
    ],
    
    "International Cooperation and Coordination": [
        "Organize and Service Sessions of the Conference of the Parties and Subsidiary Bodies", 
        "Compile and Transmit Reports from Parties", 
        "Provide Technical and Financial Assistance to Parties", 
        "Prepare and Present Activity Reports to the Conference of the Parties", 
        "Coordinate with Relevant International Organizations", 
        "Handle Administrative and Contractual Arrangements", 
        "Execute Other Secretariat Roles and Tasks Assigned by the Conference of the Parties",
        "Promote International Cooperation on Environmental Issues", 
        "Initiate and Coordinate Environmental Research and Technological Development", 
        "Raise Public Awareness on Environmental Issues", 
        "Facilitate Exchange of Information and Experience on Environmental Matters"
    ],
    
    "Humanitarian Affairs and Social Justice": [
        "Eradicate Poverty and Reduce Inequalities", 
        "Support Achievement of Sustainable Development Goals (SDGs)", 
        "Promote UN Development System Coordination", 
        "Report on Human Development and Provide Policy Recommendations", 
        "Advocate for Development through Partnerships and Goodwill Ambassadors", 
        "Promote Social Justice and Human and Labour Rights"
    ],
    
    "Economic and Trade Development": [
        "Promote International Trade for Economic Development", 
        "Formulate Policies on International Trade and Economic Development", 
        "Review and Coordinate UN System Activities in Trade and Development", 
        "Initiate Actions for Multilateral Trade Legal Instruments", 
        "Harmonize Trade and Development Policies", 
        "Sustainable Economic Growth and Employment", 
        "International Monetary Cooperation", 
        "Expansion of World Trade Policy Coordination", 
        "Private Foreign Investment Promotion", 
        "Economic Development in Low-Income Areas"
    ],
    
    "Industrial and Technological Development": [
        "Promote and Accelerate Industrial Development in Developing Countries", 
        "Assist in Establishing a New International Economic Order", 
        "Promote Global, Regional, National, and Sectoral Cooperation", 
        "Encourage and Assist Industrialization Efforts", 
        "Coordinate UN System Activities in Industrial Development", 
        "Create New Concepts and Approaches for Industrial Development", 
        "Provide a Forum for International Cooperation and Dialogue", 
        "Serve as a Clearing-House for Industrial Information", 
        "Promote Development and Transfer of Industrial Technology", 
        "Organize and Support Industrial Training Programs", 
        "Assist in Exploiting and Conserving Natural Resources", 
        "Develop Special Measures for Least-Developed Countries", 
        "Assist in Securing External Financing for Industrial Projects"
    ],
    
    "Telecommunication and Innovation": [
        "Maintain International Cooperation for Telecommunication Improvements", 
        "Enhance Participation and Foster Partnerships Between Entities and Member States", 
        "Offer Technical Assistance to Developing Countries", 
        "Mobilize Resources for Telecommunications", 
        "Use Telecommunications to Facilitate Peaceful Relations"
    ],
    
    "Health and Human Rights": [
        "Attain the Highest Possible Health Level for All Peoples", 
        "Act as the Authority on International Health Work", 
        "Establish Collaboration with the UN and Health Agencies", 
        "Assist Governments in Strengthening Health Services", 
        "Study and Report on Public Health Techniques", 
        "Promote and Protect All Human Rights", 
        "Civil, Cultural, Economic, Political, and Social Rights Advocacy"
    ],
    
    "Environmental Protection and Sustainable Development": [
        "Provide Policy Guidance for Environmental Programmes", 
        "Promote International Cooperation on Environmental Issues", 
        "Climate Change Science Assessment and Response Strategies", 
        "Conserving Biological Diversity and Sustainable Use of Resources", 
        "Conservation and Sustainable Use of Wetlands", 
        "Addressing Mercury Emissions and Reducing Releases",
        "Science-Policy Interface for Biodiversity Knowledge Generation and Assessment",
        "Policy Support and Tools",
        "Capacity Building for Science-Policy Integration"
    ],
    
    "Space and Technology": [
        "Enhancing Space-derived Economic Benefits", 
        "Leveraging Space for Innovation and Challenges", 
        "Supporting SDGs with Space Technology", 
        "Strengthening Global Space Governance"
    ],
    
    "Disaster Risk Reduction and Humanitarian Assistance": [
        "Support the Implementation and Review of the Sendai Framework", 
        "Support Development of National and Local Mechanisms for Disaster Risk Reduction", 
        "Provide Assistance and Services in Migration", 
        "Support International Cooperation and Coordination for Disaster Relief"
    ],
    
    "Gender Equality and Empowerment": [
        "Promote Women’s Empowerment Principles", 
        "Gender Equality and Empowerment in Leadership and Capacity Building"
    ],
    
    "Intellectual Property and Legal Harmonization": [
        "Intellectual Property Protection", 
        "Administrative Cooperation", 
        "Legal Harmonization and International Agreements", 
        "Technical Assistance for Intellectual Property Issues"
    ],
    
    "Migration Assistance and Services": [
        "Migration Assistance and Services", 
        "International Cooperation and Coordination", 
        "Research and Consultation on Migration", 
        "Harmonization of International Activities", 
        "Sustainable Development and Human Rights Integration"
    ],
    
    "Sustainable Development and Capacity Building": [
        "Sustainable Development and Capacity Building", 
        "Humanitarian Support and Expertise", 
        "Efficient Project Services and Organizational Excellence"
    ],
    
    "Intellectual Property and Innovation": [
        "Encourage Research and Application of Atomic Energy for Peaceful Uses", 
        "Provide Materials Services and Facilities for Atomic Energy Development", 
        "Foster Exchange of Scientific Information and Training", 
        "Establish Safeguards Against Military Use of Atomic Energy"
    ],
    
    "Meteorology and Hydrology": [
        "Worldwide Cooperation in Meteorological and Hydrological Observations and Services", 
        "Establishment of Rapid Information Exchange Systems", 
        "Standardization of Meteorological Observations", 
        "Promotion of Operational Hydrology and Meteorological-Hydrological Cooperation",
        "Encouragement of Research and Training in Meteorology and Related Fields"
    ],
    
    "Economic Development and Investment": [
        "Sustainable Economic Growth, Employment, and Living Standards", 
        "Financial Stability", 
        "Economic Expansion and Development", 
        "Expansion of World Trade Policy Coordination and Exchange", 
        "Reconstruction and Development Assistance", 
        "Private Foreign Investment Promotion", 
        "Finance for Productive Purposes", 
        "Economic Development in Low-Income Areas"
    ],
    
    "Maritime Safety and Hydrography": [
        "Maritime Safety and Awareness", 
        "Global Hydrographic Data and Information Accessibility", 
        "Hydrographic Capability and Capacity Building", 
        "International Standards and Uniformity", 
        "Authoritative Guidance on Hydrographic Matters", 
        "Coordination of Hydrographic Activities", 
        "Regional Hydrographic Cooperation"
    ],
    
    "Space Research and Technology": [
        "Enhancing Space-derived Economic Benefits", 
        "Leveraging Space for Innovation and Challenges", 
        "Supporting SDGs with Space Technology", 
        "Building Partnerships for Peaceful Space Use", 
        "Strengthening Global Space Governance"
    ]
}


# Function to assign categories
def assign_categories(objectives):
    assigned_categories = []
    for category, keywords in category_map.items():
        if any(keyword.lower() in objectives.lower() for keyword in keywords):
            assigned_categories.append(category)
    return ', '.join(assigned_categories)

# Apply the function to the DataFrame
def_objectives['Categories'] = def_objectives['Defined Objectives'].apply(assign_categories)

In [None]:
def_object = pd.read_csv("./data/defined_objectives_categories.csv")
def_object.head()