In [3]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
from langchain.llms import HuggingFaceHub
from langchain.schema.runnable import RunnableSequence
from langchain.prompts import PromptTemplate

#hugging face
from huggingface_hub import login

In [4]:
#quantization 
quantization_config = BitsAndBytesConfig(load_in_8bit = True, llm_int8_enable_fp32_cpu_offload=True)

In [5]:
#Initialize tokenizer and model

t5_tokenizer = T5Tokenizer.from_pretrained("flan_t5_finetuned")
t5_model = T5ForConditionalGeneration.from_pretrained("flan_t5_finetuned", quantization_config=quantization_config, device_map="auto")

In [6]:
#Define pipelines

flan_pipeline = pipeline("text2text-generation", model=t5_model, tokenizer=t5_tokenizer)

In [7]:
#prompts

generation_prompt_template = PromptTemplate(
    template = "Generate an SOP based on the following answers: {answers}", 
    input_variables = ["answers"]
)

refinement_prompt_template = PromptTemplate(
    template = "Polish this SOP to sound more professional and natural: {sop}",
    input_variables = ["sop"]
)

In [8]:
#Runnable sequence

sop_generator_refiner = generation_prompt_template | flan_pipeline | refinement_prompt_template | flan_pipeline

In [9]:
# function for generating and refining sop

def generate_refine_sop(answers):
    sop_data = {"answers" : answers}
    refined_sop = sop_generator_refiner.invoke(sop_data)
    print("Refined SOP: ", refined_sop)
    return refined_sop

In [10]:
sample_answers = [
    "My interest in data science was sparked during my college hackathon on data analysis, where I learned how powerful data can be in decision-making.",
    "During my undergraduate studies, I attended a seminar by a data scientist, which motivated me to pursue this field. I realized the broad applications of data science in various industries.",
    "Subjects like 'Big Data Analytics,' 'Machine Learning,' and 'Data Mining' have prepared me with foundational knowledge in data science and analytics.",
    "I worked on a project where we created a remote-controlled car using Arduino, which enhanced my understanding of hardware integration and programming.",
    "I interned as a web developer, where I gained hands-on experience in data-driven decision-making and project management.",
    "I have published a paper on 'Facial Recognition using CNN,' which allowed me to understand the practical applications of machine learning.",
    "I hold a Google Cloud Associate Cloud Engineer certification, which strengthened my technical skills and understanding of cloud services.",
    "My long-term goal is to work as a data scientist at a top tech company like Google or Microsoft and contribute to impactful data-driven solutions."
]

In [11]:
generate_refine_sop(sample_answers)

ValueError:  `args[0]`: text='Generate an SOP based on the following answers: [\'My interest in data science was sparked during my college hackathon on data analysis, where I learned how powerful data can be in decision-making.\', \'During my undergraduate studies, I attended a seminar by a data scientist, which motivated me to pursue this field. I realized the broad applications of data science in various industries.\', "Subjects like \'Big Data Analytics,\' \'Machine Learning,\' and \'Data Mining\' have prepared me with foundational knowledge in data science and analytics.", \'I worked on a project where we created a remote-controlled car using Arduino, which enhanced my understanding of hardware integration and programming.\', \'I interned as a web developer, where I gained hands-on experience in data-driven decision-making and project management.\', "I have published a paper on \'Facial Recognition using CNN,\' which allowed me to understand the practical applications of machine learning.", \'I hold a Google Cloud Associate Cloud Engineer certification, which strengthened my technical skills and understanding of cloud services.\', \'My long-term goal is to work as a data scientist at a top tech company like Google or Microsoft and contribute to impactful data-driven solutions.\']' have the wrong format. The should be either of type `str` or type `list`

In [36]:
def generate_detailed_sop_v2(answers):
    # Prepare input text from answers with structured guidance
    input_text = " ".join([f"Answer: {answer}" for answer in answers])
    detailed_prompt = f"""
    Generate a detailed SOP based on the following responses:
    {input_text}
    
    Include sections like:
    1. **Introduction**: Briefly introduce the applicant and their interest in data science.
    2. **Academic Background**: Explain the relevant subjects studied and any motivating factors.
    3. **Projects and Research**: Describe significant projects, research, and practical applications.
    4. **Internship and Work Experience**: Summarize relevant professional experiences.
    5. **Certifications and Skills**: Highlight any technical certifications and skills.
    6. **Long-term Goals**: Explain long-term career goals and aspirations.

    The SOP should be professional, detailed, and reflective of the applicant's experiences and goals.
    """

    try:
        # Run generation with increased max_new_tokens
        generation_output = flan_pipeline(
            detailed_prompt,
            max_new_tokens=700,  # Allows a longer output for complex SOPs
            return_full_text=False
        )
        sop_content = generation_output[0]["generated_text"]
        print("Generated SOP Draft:", sop_content)

        return sop_content

    except Exception as e:
        print("Error generating SOP:", str(e))
        return "An error occurred during SOP generation."


In [37]:
generate_and_refine_sop(sample_answers)

Input Text for SOP Generation: Answer: My interest in data science was sparked during my college hackathon on data analysis, where I learned how powerful data can be in decision-making. Answer: During my undergraduate studies, I attended a seminar by a data scientist, which motivated me to pursue this field. I realized the broad applications of data science in various industries. Answer: Subjects like 'Big Data Analytics,' 'Machine Learning,' and 'Data Mining' have prepared me with foundational knowledge in data science and analytics. Answer: I worked on a project where we created a remote-controlled car using Arduino, which enhanced my understanding of hardware integration and programming. Answer: I interned as a web developer, where I gained hands-on experience in data-driven decision-making and project management. Answer: I have published a paper on 'Facial Recognition using CNN,' which allowed me to understand the practical applications of machine learning. Answer: I hold a Google 

[{'generated_text': "['generated_text': 'I am a data scientist with a"}]