In [None]:
!pip install -q -U google-genai

In [None]:
from google import genai
from google.genai import types
API_KEY = "your_google_api_key"   # replace with your API key
client = genai.Client(api_key=API_KEY)

In [None]:
EDUCATIONS = """Instruction: As a resume writing instructor, optimize the given "education" section of resume in JSON format.
- Ensure integrity and consistency.
- Retain all provided details of education and degree, including institution names, degrees, dates, GPA.
- Only return "education" section as a response, don't include any other sections.
- Avoid missing or adding any educational details.
- Avoid hallucination.

<output_example>
"education": [
  {{
    "degree": "Masters of Science - Computer Science (Thesis)",
    "university": "Arizona State University, Tempe, USA",
    "from_date": "Aug 2023",
    "to_date": "May 2025",
    "grade": "3.8/4"
  }},
  {{
    "degree": "Bachelor of Science - Computer Science",
    "university": "Bangalore University, Bangalore, India",
    "from_date": "Aug 2019",
    "to_date": "May 2023",
    "grade": "3.6/4"
  }}
]
</output_example>

"""

PROJECTS = """Instructions: As a resume writing instructor, improve the given "projects" section of resume in JSON format.
- Retain listed projects in the resume.
- Only return "projects" section as a response, don't include any other sections.
- Improve clarity and alignment with the job requirements.
- Use clear, concise and professional language.
- Format each project with bullet points.
- In each project description, it should include info, such as Task, Feature, Result.
- Avoid hallucination or adding details not given in the original resume data..

<output_template>
"projects": [
    {{
      "name": "project name1",
      "link": "https://devpost.com/software/project1",
      "from_date": "Nov 2023",
      "to_date": "Nov 2023",
      "description": [
        "introduction of project task, key features, and results."
      ]
    }},
    {{
      "name": "project name2",
      "link": "https://devpost.com/software/project2",
      "from_date": "June 2022",
      "to_date": "July 2022",
      "description": [
        "introduction of project task, key features, and results."
      ]
    }}
  ]
</output_template>

"""

SKILLS = """Instructions: As a resume writing instructor, optimize the given "skill_section" section of resume in JSON format.
- Enhance the structure and alignment with the job description.
- Only return "skill_section" section as a response, don't include any other sections.
- Ensure most relevant skills in resume are retained.
- Add other relevant skills showed in other sections of resume if they are aligned with job requirements.
- Remove irrelevant details.
- Use precise and professional language.
- Avoid adding new skills that are not showed in resume.
- Avoid hallucination.

<output_example>
"skill_section": [
    {{
      "name": "Programming Languages",
      "skills": ["Python", "JavaScript"]
    }},
    {{
      "name": "Cloud and DevOps",
      "skills": [ "Azure", "AWS"]
    }}
  ]
</output_example>

"""

EXPERIENCE = """Instructions: As a resume writing instructor, optimize the given "work_experience" section of resume in JSON format.
- Format each project as the following output_example.
- In each experience, description should include information about responsibilities and impacts as string text.
- Improve clarity, structure, and alignment with the job description.
- Retain all important and relevant experience without altering factual details.
- Only return "work_experience" section as a response, don't include any other sections.
- Avoid adding details not given in the original resume data.

<output_example>
"work_experience": [
    {{
      "role": "Software Engineer",
      "company": "Winjit Technologies",
      "location": "Pune, India"
      "from_date": "Jan 2020",
      "to_date": "Jun 2022",
      "description": [
        "Engineered 10+ RESTful APIs Architecture and Distributed services; Designed 30+ low-latency responsive UI/UX application features with high-quality web architecture; Managed and optimized large-scale Databases. (Systems Design)",
        "Initiated and Designed a standardized solution for dynamic forms generation, with customizable CSS capabilities feature, which reduces development time by 8x; Led and collaborated with a 12 member cross-functional team. (Idea Generation)"
      ]
    }}
  ]
</output_example>

"""

SUMMARY = """Instructions: As a resume writing instructor, optimize the given "summary" section of resume in JSON format.
- Retain key details while enhancing clarity, conciseness, and alignment with the job description.
- Ensure a strong, informative summary without adding new, unprovided content.
- Remove irrelevant and redundant content.
- Only return "summary" section as a response, don't include any other sections.
- No more than 100 words.
- Avoid hallucination

<output_example>
{{
  "summary": "Results-driven Marketing Professional with 5+ years of experience in digital marketing, brand strategy, and campaign management. Proven track record of increasing online engagement by 40% and driving a 25% boost in sales through data-driven strategies. Skilled in SEO, social media marketing, and analytics tools like Google Analytics and HubSpot. Passionate about creating innovative marketing solutions to help businesses grow. Seeking to leverage expertise in a dynamic, growth-oriented organization."
}}
</output_example>"""

In [None]:
import json
import time
json_file = "/content/merged_json_ds.json"
with open(json_file, 'r') as file:
    data = json.load(file)

In [None]:
merged_json = []

In [None]:
# can select one out of these three sections, comment the rest of sections
section_dict = {
    'skill_section': SKILLS,
    'projects': PROJECTS,
    'work_experience': EXPERIENCE
}


In [None]:
for k in section_dict:
  for instance in data:
    input = instance.get("input")
    response = client.models.generate_content(
        model="gemini-1.5-flash", contents=f"{input}",
        config=types.GenerateContentConfig(
          system_instruction=f"{section_dict[k]}",
          max_output_tokens= 2500,
          top_k= 200,
          top_p= 0.9,
          temperature= 0.9,
          response_mime_type= 'application/json',
          stop_sequences= ['\n']
      )
    )
    # json_response = json.loads(response.text)
    # refined_sections[key] = json_response

    json_output = None
    if response.text.startswith("```json"):
      generated_text = response.text
      generated_text=generated_text.replace("```json", "").replace("```", "").strip()
      json_output = json.loads(generated_text)
    elif isinstance(response.text, str):
      json_output = json.loads(response.text)
    elif isinstance(response.text, dict):
      json_output = response.text
    else:
      print("Unknown response type")


    if json_output is not None:
      print(json.dumps(json_output, indent=4))
      input["instruction"] = section_dict[k]
      full_json = {
          "input": input,
          "output": json_output
      }
      merged_json.append(full_json)
      time.sleep(60)

In [None]:
print(len(merged_json))

In [None]:
refined_resume_section_file = "/content/finetuning_resume_section_ds.json"
with open(refined_resume_file, "w+", encoding='utf-8') as output_file:
  json.dump(merged_json, output_file, indent=4)