In [1]:
# Load the json data from ../data/data.json and see how many entries there are
import json

with open("../data/data.json") as f:
    data = json.load(f)
    print(len(data))

2565


In [2]:
data["11"]

{'organization': '0xCarbon',
 'theme_id': 2,
 'kyc_completed': True,
 'display_id': 2,
 'challenge_id': 1,
 'country': 'Nigeria',
 'sub_category_id': 452,
 'categorizer_completed': True,
 'category_id': 15,
 'evaluator_completed': True,
 'attachment_extractor_completed': False,
 'assigned_reviewer': None,
 'extractor_completed': False,
 'user_status': 'Yes',
 'user_comments': 'hello world',
 'created_at': '2024-08-02T05:18:28.549019Z',
 'id': 11,
 'LLM_summary': '0xcarbon is a SaaS product that helps FMCG businesses in Southeast Asia reduce their carbon footprint. It uses big data, machine learning, and blockchain technology to provide businesses with suggestions for small incremental changes to their operations, connect them to green energy sources, and process carbon credits to offset unavoidable emissions. By providing accessible and effective tools for businesses to combat climate change, 0xcarbon aims to play a critical role in catalyzing the transition to a low-carbon economy.',


In [3]:
# Find all unique challange types
unique_challenges = set()
for entry in data.values():
    unique_challenges.add(entry["Challenge"])
print(unique_challenges)

{'Climate Impact Innovations Challenge 2023', 'The Liveability Challenge 2022', 'Net Zero Challenge 2023', 'The Liveability Challenge 2023', 'The Liveability Challenge Form 2025', 'Climate Impact Innovations Challenge 2024', 'Liveability Challenge 2024', 'Net Zero Challenge 2024'}


In [4]:
# User status count and occurances
user_status_count = {}
for entry in data.values():
    if entry["user_status"] in user_status_count:
        user_status_count[entry["user_status"]] += 1
    else:
        user_status_count[entry["user_status"]] = 1
print(user_status_count)

{'Pending': 685, 'Yes': 85, 'KIV': 94, 'No': 1225, 'Maybe': 19, 'No ': 3, 'nan': 2, '': 246, 'Reject': 180, 'Recommended': 26}


In [5]:
data["30"].keys()

dict_keys(['organization', 'theme_id', 'kyc_completed', 'display_id', 'challenge_id', 'country', 'sub_category_id', 'categorizer_completed', 'category_id', 'evaluator_completed', 'attachment_extractor_completed', 'assigned_reviewer', 'extractor_completed', 'user_status', 'user_comments', 'created_at', 'id', 'LLM_summary', 'updated_at', 'title', 'LLM_status', 'jotform_id', 'pre_aug_sixth', 'Challenge', 'Theme', 'Category', 'Subcategory', 'form_fields'])

In [6]:
# Input features on which we will decide whether to give a grant or not
input_keys = [
    "Challenge",
    "What do you aim to achieve and how would you develop your project if The Liveability Challenge were to provide you with funding or support?",
]

# Whether the grant was accepted or not.
output_key = "user_status"

In [7]:
"""LLM_summary
Theme
Category
Subcategory
Project objectives
Current issue to be addressed
Proposed solution
Impact of proposed solution
Competitive advantage of proposed solution
Viable commercial applications of proposed solutions
Current competing technologies/competitors
Project stage (TRL)
Amount of grant funding sought (in SGD)
Proof of concept status
Commercial partners engaged
Scope of work
Biggest possible impact
Success metrics
Remarks/supporting information"""

'LLM_summary\nTheme\nCategory\nSubcategory\nProject objectives\nCurrent issue to be addressed\nProposed solution\nImpact of proposed solution\nCompetitive advantage of proposed solution\nViable commercial applications of proposed solutions\nCurrent competing technologies/competitors\nProject stage (TRL)\nAmount of grant funding sought (in SGD)\nProof of concept status\nCommercial partners engaged\nScope of work\nBiggest possible impact\nSuccess metrics\nRemarks/supporting information'

In [8]:
yes_label_list = ["Yes", "Recommended"]
no_label_list = ["No", "No ", "Reject"]


def get_dict_entry(entry: dict, key: str) -> str:
    if key in entry:
        return entry[key]
    elif key in entry["form_fields"]:
        return entry["form_fields"][key]
    else:
        return "None"


def convert_entry_to_prompt(grant_entry: dict) -> str:
    # Generating the prompt with the data
    prompt = f"""
    Given the following details of a grant application, determine the likelihood of acceptance. 
    Assess how well the project aligns with the challenge theme, its viability, its potential impact, and any competitive advantage. 
    Consider the stage of development, the proof of concept status, and existing partnerships. 
    Classify the likelihood of acceptance as 'High', 'Medium', or 'Low', and briefly explain your rationale.

    Grant Application Summary:
    - **Project Theme:** {get_dict_entry(grant_entry, 'Theme')}
    - **Category:** {get_dict_entry(grant_entry, 'Category')}
    - **Subcategory:** {get_dict_entry(grant_entry, 'Subcategory')}
    - **Project Title:** {get_dict_entry(grant_entry, 'title')}
    - **Grant Amount Sought:** {get_dict_entry(grant_entry, 'Amount of grant funding sought (in SGD)')}
    - **Objectives:** {get_dict_entry(grant_entry, 'Project objectives')}
    - **Have you obtained proof of concept for your project?** {get_dict_entry(grant_entry, 'Have you obtained proof of concept for your project?')}

    Solution Summary: {get_dict_entry(grant_entry, 'LLM_summary')}

    Proposed solution: {get_dict_entry(grant_entry, 'Proposed solution')}

    Proposed project's scope of work: {get_dict_entry(grant_entry, "Proposed project's scope of work")}

    Classify the likelihood of acceptance and briefly explain why this project meets or does not meet the challenge's acceptance criteria.
    """

    return prompt


def make_training_example(entry: dict) -> tuple[str, str]:
    prompt = convert_entry_to_prompt(entry)
    if entry[output_key] in yes_label_list:
        return prompt, "Yes"
    elif entry[output_key] in no_label_list:
        return prompt, "No"
    else:
        return None


# Just a test
for i, d in enumerate(data.values()):
    te = make_training_example(d)
    if te is not None:
        print(f"Entry {i}")
        print(te[0])
        print(te[1])

    if i > 10:
        break

Entry 1

    Given the following details of a grant application, determine the likelihood of acceptance. 
    Assess how well the project aligns with the challenge theme, its viability, its potential impact, and any competitive advantage. 
    Consider the stage of development, the proof of concept status, and existing partnerships. 
    Classify the likelihood of acceptance as 'High', 'Medium', or 'Low', and briefly explain your rationale.

    Grant Application Summary:
    - **Project Theme:** Climate Change
    - **Category:** Carbon Management
    - **Subcategory:** Carbon Emissions Reduction through SaaS
    - **Project Title:** 0xcarbon - Enabling a low carbon economy
    - **Grant Amount Sought:** 187000
    - **Objectives:** Our primary objective at 0xcarbon is to help FMCGs in Africa reduce their carbon footprint and transition to a low-carbon economy. The company's goal is to leverage big data, machine learning, and blockchain technology to provide actionable insights and so