In [1]:
from groq import Groq
import os
from dotenv import load_dotenv

# Load the .env file
load_dotenv()

client = Groq(
    api_key=os.getenv('GROQ_API_KEY')
)

# Define the function to generate a prompt
entity_types=[
    "Material",
    "ChemicalCompound",
    "DepositionMethod",
    "PassivationTechnique",
    "SolarCellType",
    "Layer",
    "PerformanceMetric",
    "EfficiencyMetric",
    "ElectricalProperty",
    "OpticalProperty",
    "CharacterizationTechnique",
    "DefectType",
    "DegradationMechanism",
    "EnvironmentalFactor",
    "IndustryStandard",
    "TestingProtocol",
    "ProductionEquipment",
    "ProductionScale",
    "ResearchInstitution"
]

def generate_prompt(text_chunk, requirements):
    # Start with a base instruction
    prompt = (
        "Read the following text and extract the relationships between entities. Please use short-term to represent relationships. Focus on the following categories: "
    )
    
    # Add each requirement to the prompt
    prompt += ", ".join(requirements) + " for entity extration. "
    prompt += (
        "Output each relationship as a JSON object with 'entity1', 'relationship', and 'entity2'. "
        "Do not include any explanations or additional text, only JSON.\n\n"
        "Begin your JSON response with [ and end it with ].\n\n"
        "Text: \"{text_chunk}\"\n\nOutput:\n###\n["
    )
    
    return prompt


# Function to extract relationships using OpenAI API
def extract_relationships(text_chunk):
    prompt = generate_prompt(text_chunk, entity_types)
    response =  client.chat.completions.create(
        model="gemma2-9b-it",  # or "gpt-3.5-turbo" depending on your access
        messages=[
            {"role": "system", "content": "You are an expert in text analysis and relationship extraction, and professional in the field of solar cell fabrication and characterization."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=5000,  # Adjust based on the expected length of response
        temperature=0.1
    )

    # Extract and return the response text
    output = response.choices[0].message.content
    return output

# Test with a sample text chunk
text_chunk = "p-type, n-type and upgraded metallurgical\ngrade crystalline silicon solar cells, respectively. Especially, the regeneration of boron-oxygen related defects,\nwhich cause carrier induced degradation, will be closely discussed since most of industrial solar cells are fabricated by boron-doped p-type silicon wafer. Moreover, laser-induced hydrogen passivation, which can locally\nrecover defective area on the solar cells, will be addressed. In the conclusion, proper conditions of advanced\nhydrogen passivation for the successful improvement of minority carrier lifetime will be summarized.\ncrystalline silicon [5,9–15]. Moreover, it has been reported that boronoxygen (B-O) complex, which leads to carrier-induced degradation\n(CID) in boron doped p-type silicon wafers, can be deactivated effectively by using hydrogen atoms in the silicon nitride layer [16–21].\nFor effective hydrogen passivation of defects in crystalline silicon,\nlocally stable charge states of interstitial hydrogen was"
relationships = extract_relationships(text_chunk)
print(relationships)

Here are the relationships extracted from the text, formatted as JSON objects:

```json
[
  {
    "entity1": "p-type silicon solar cells",
    "relationship": "is_type_of",
    "entity2": "SolarCellType",
    "entity1_type": "SolarCellType",
    "entity2_type": "SolarCellType"
  },
  {
    "entity1": "n-type silicon solar cells",
    "relationship": "is_type_of",
    "entity2": "SolarCellType",
    "entity1_type": "SolarCellType",
    "entity2_type": "SolarCellType"
  },
  {
    "entity1": "upgraded metallurgical grade crystalline silicon solar cells",
    "relationship": "is_type_of",
    "entity2": "SolarCellType",
    "entity1_type": "SolarCellType",
    "entity2_type": "SolarCellType"
  },
  {
    "entity1": "boron-oxygen related defects",
    "relationship": "causes",
    "entity2": "carrier induced degradation",
    "entity1_type": "DefectType",
    "entity2_type": "DegradationMechanism"
  },
  {
    "entity1": "boron doping",
    "relationship": "used_in",
    "entity2": "p-type