In [21]:
from mistralai import Mistral
from pdfminer.high_level import extract_text
import os
from dotenv import load_dotenv
import csv

In [22]:
def extract_text_pdf(pdf_path):
    text = extract_text(pdf_path)
    return text

In [23]:
pdf_path = '../data/pdf/college_resume.pdf'
print(extract_text_pdf(pdf_path))

AAYUSH KUMAR 
Address: Rishi comforts PG 578, 
60 Feet Rd, AECS, Layout - C Block, 
Marathahalli, Bengaluru Karnataka - 560037 
Email ID: aaku21ainds@cmrit.ac.in 
Mobile No: +91 895 593 1443 
LinkedIn : https://www.linkedin.com/in/aayush-kumar-6b5191263/ 
GitHub: Aayush-93407 
 CAREER OBJECTIVE 
Dynamic  and  driven  Bachelor  of  Engineering  graduate  from  CMR  Institute  of  Technology,  specializing  in  Artificial 
Intelligence and Data Science, seeking a challenging role where I can leverage my strong analytical skills and passion 
for  innovation.  Committed  to  continuous  learning  and  professional  growth,  drive  efficiency,  and  create  value  in  a 
dynamic and collaborative environment. 
 EDUCATION QUALIFICATION 
•  Bachelors of Engineering – Artificial Intelligence and Data Science Engineering 

CMR Institute of Technology, Bengaluru 
9.03 CGPA, 2025(Pursuing) 

•  12th Grade – Science 

Central Academy, Kota, Rajasthan 
79.6%, 2021 
•  10th Grade 

Excellent Public 

In [24]:
def analyze_resume(text, api_key_mistral):
    model = "mistral-large-latest"
    prompt = f"Analyze this text from a resume and return the following information on new lines \nName, Email ID, Contact Number, LinkedIn and GitHub page links, and extract all skills from projects and those under the skills section and pack them all under the skills field, seperated by commas \nGive me these details in the form field_name : values, each on a new line\nPlease make sure the field names are exactly like this: Name, Email ID, Contact Number, LinkedIn, GitHub, Skills\nPlease don't include any other unnecessary sentence in your response \n{text}"
    
    client = Mistral(api_key=api_key_mistral)
    
    completion = client.chat.complete(
        model=model,
        messages=[
            {
                "role":"user",
                "content": prompt
            }
        ]
    )
    return completion.choices[0].message.content

In [25]:
def recommend_projects(skills, api_key_mistral):
    model = "mistral-large-latest"
    prompt = f"Based on the skills provided below, recommend top 5 projects that the user can work on and successfully complete\nGenerate your response in this format: Project Name, Project Description, Tools and Skills Utilized, and Steps \nPlease make sure you don't include unnecessary sentences in your response \n{skills}"
    
    client = Mistral(api_key=api_key_mistral)
    
    completion = client.chat.complete(
        model=model,
        messages = [
            {
                "role":"user", 
                "content":prompt
            }
        ]
    )
    
    return completion.choices[0].message.content

In [26]:
def create_documentation(project_desc, api_key_mistral):
    model = "mistral-large-latest"
    prompt = f"Based on the project description given below, create a well-structured markdown documentation suitable for the project's Github repository. The documentation should display the tools used, a detailed project description, and setup and execution steps \nPlease make sure you add no unnecessary sentences in your response and keep your answers relevant to the data \n{project_desc}"
    
    client = Mistral(api_key=api_key_mistral)
    
    completion = client.chat.complete(
        model=model,
        messages = [
            {
                "role":"user", 
                "content":prompt
            }
        ]
    )
    
    return completion.choices[0].message.content

**MAIN FOR RESUME FIELD EXTRACTION**

In [27]:
def main():
    filepath = '../data/pdf/1CR21EC252_ADARSHVINOD_RESUME.pdf'
    
    ## Generate API key and store in .env file
    # Load Mistral API key
    load_dotenv("C:\\Users\\ADMIN\\Desktop\\api_key.env")
    api_key_mistral = os.getenv("MISTRALAI_API_KEY")
    
    if os.path.splitext(filepath)[1] == '.pdf':
        resume_text = extract_text_pdf(filepath)
        
    user_info = analyze_resume(resume_text, api_key_mistral)
    # user_info -> string
    
    user_info_dict = {}
    # split by \n
    lines = user_info.split("\n")
    for line in lines:
        key, *value = line.split(':', 1)
        if not value:
            continue
        user_info_dict[key.strip()] = value[0].strip()
        
    # print(user_info_dict)
        
    # user_info_dict = {
    #     "Name": name,
    #     "Email ID": emails[0] if emails else "",
    #     "Contact Number": phone_numbers[0] if phone_numbers else "",
    #     "LinkedIn": next((link for link in links if 'linkedin' in link), ""),
    #     "Github": next((link for link in links if 'github' in link), ""),
    #     "Skills": ', '.join(skills)
    # }
    
    csv_filename = "user_info.csv"
    with open(csv_filename, mode="a", newline="") as file:
        fieldnames = ["Name", "Email ID", "Contact Number", "LinkedIn", "GitHub", "Skills"]
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        
        if file.tell() == 0:
            writer.writeheader()
            
        writer.writerow(user_info_dict)
        
        file.close()
        
    # Project recommendations
    # projects -> string
    projects = recommend_projects(user_info_dict["Skills"], api_key_mistral)
    print(projects.replace('**', '').replace('###', '').split("\n\n"))
    
    # Project documentation generation
    projects_split = projects.split(". **")
    # print(f"projects_split: {projects_split}")
    
    project_index = 3
    if project_index % 2 == 0:
        project_index = project_index+1 if project_index + 1 < len(projects_split) else project_index-1
        
    project_documentation_reponse = create_documentation(projects_split[project_index][:-1], api_key_mistral)
    print(project_documentation_reponse)
    
    md_filename = "README.md"
    with open(md_filename, mode="w") as file:
        file.write(project_documentation_reponse)
        file.close()
    

In [28]:
main()

['1. Project Name: IoT Weather Station\n   Project Description: Develop an IoT-based weather station that collects environmental data and displays it on a web dashboard.\n   Tools and Skills Utilized: Arduino, ESP32, Python, Django, HTML, CSS, JavaScript, React, SQLite3, Git\n   Steps:\n   - Set up Arduino and ESP32 to collect temperature, humidity, and pressure data.\n   - Use Python to process and store data in SQLite3.\n   - Create a Django backend to serve data to the frontend.\n   - Build a React frontend to display data in real-time.\n   - Use Git for version control.', '2. Project Name: Image Processing with Machine Learning\n   Project Description: Build an application that processes images and performs object detection using machine learning.\n   Tools and Skills Utilized: Python, OpenCV, Machine Learning, HTML, CSS, JavaScript, React, Django, VS Code, Git\n   Steps:\n   - Collect and preprocess image data using OpenCV.\n   - Implement a machine learning model for object detec