In [11]:
from pptx import Presentation
from mistralai import Mistral
from fpdf import FPDF
from pdfminer.high_level import extract_text
import mistune
from mistune.renderers.markdown import MarkdownRenderer
import pypandoc
import os
from dotenv import load_dotenv
import csv
import re

In [3]:
def extract_text_pdf(pdf_path):
    text = extract_text(pdf_path)
    return text

In [4]:
pdf_path = '../data/pdf/college_resume.pdf'
print(extract_text_pdf(pdf_path))

AAYUSH KUMAR 
Address: Rishi comforts PG 578, 
60 Feet Rd, AECS, Layout - C Block, 
Marathahalli, Bengaluru Karnataka - 560037 
Email ID: aaku21ainds@cmrit.ac.in 
Mobile No: +91 895 593 1443 
LinkedIn : https://www.linkedin.com/in/aayush-kumar-6b5191263/ 
GitHub: Aayush-93407 
 CAREER OBJECTIVE 
Dynamic  and  driven  Bachelor  of  Engineering  graduate  from  CMR  Institute  of  Technology,  specializing  in  Artificial 
Intelligence and Data Science, seeking a challenging role where I can leverage my strong analytical skills and passion 
for  innovation.  Committed  to  continuous  learning  and  professional  growth,  drive  efficiency,  and  create  value  in  a 
dynamic and collaborative environment. 
 EDUCATION QUALIFICATION 
•  Bachelors of Engineering – Artificial Intelligence and Data Science Engineering 

CMR Institute of Technology, Bengaluru 
9.03 CGPA, 2025(Pursuing) 

•  12th Grade – Science 

Central Academy, Kota, Rajasthan 
79.6%, 2021 
•  10th Grade 

Excellent Public 

In [5]:
def extract_text_ppt(ppt_path):
    ppt = Presentation(ppt_path)
    text = ""
    for slide in ppt.slides:
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                text += shape.text + "\n\n"
    
    return text

In [None]:
def generate_notes_mistral(text, api_key_mistral):
    model = "mistral-large-latest"
    prompt = f"Analyze this text from a resume and return the following information on new lines \nName, Email ID, Contact Number, LinkedIn and GitHub page links, and extract all skills from projects and those under the skills section and pack them all under the skills field, seperated by commas \nGive me these details in the form field_name : values, each on a new line\nPlease make sure the field names are exactly like this: Name, Email ID, Contact Number, LinkedIn, GitHub, Skills\n{text}"
    
    client = Mistral(api_key=api_key_mistral)
    
    completion = client.chat.complete(
        model=model,
        messages=[
            {
                "role":"user",
                "content": prompt
            }
        ]
    )
    return completion.choices[0].message.content

In [13]:
def recommend_projects(skills, api_key_mistral):
    model = "mistral-large-latest"
    prompt = f"Based on the skills provided below, recommend top 5 projects that the user can work on and successfully complete\nGenerate your response in this format: Project Name, Project Description, Tools and Skills Utilized, and Steps \n{skills}"
    
    client = Mistral(api_key=api_key_mistral)
    
    completion = client.chat.complete(
        model=model,
        messages = [
            {
                "role":"user", 
                "content":prompt
            }
        ]
    )
    
    return completion.choices[0].message.content

**MAIN FOR RESUME FIELD EXTRACTION**

In [None]:
def main():
    filepath = '../data/pdf/1CR21EC252_ADARSHVINOD_RESUME.pdf'
    
    ## Generate API key and store in .env file
    # Load Mistral API key
    # load_dotenv("C:\\Users\\ADMIN\\Desktop\\api_key.env")
    # api_key_mistral = os.getenv("MISTRALAI_API_KEY")
    
    if os.path.splitext(filepath)[1] == '.pdf':
        resume_text = extract_text_pdf(filepath)
        
    user_info = generate_notes_mistral(resume_text, api_key_mistral)
    # user_info -> string
    
    user_info_dict = {}
    # split by \n
    lines = user_info.split("\n")
    for line in lines:
        key, *value = line.split(':', 1)
        if not value:
            continue
        user_info_dict[key.strip()] = value[0].strip()
        
    print(user_info_dict)
        
    # user_info_dict = {
    #     "Name": name,
    #     "Email ID": emails[0] if emails else "",
    #     "Contact Number": phone_numbers[0] if phone_numbers else "",
    #     "LinkedIn": next((link for link in links if 'linkedin' in link), ""),
    #     "Github": next((link for link in links if 'github' in link), ""),
    #     "Skills": ', '.join(skills)
    # }
    
    csv_filename = "user_info.csv"
    with open(csv_filename, mode="a", newline="") as file:
        fieldnames = ["Name", "Email ID", "Contact Number", "LinkedIn", "GitHub", "Skills"]
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        
        if file.tell() == 0:
            writer.writeheader()
            
        writer.writerow(user_info_dict)
        
        file.close()
        
    # Project recommendations
    projects = recommend_projects(user_info_dict["Skills"], api_key_mistral)
    # projects -> string
    print(projects)
    

In [16]:
main()

{'Name': 'ADARSH VINOD', 'Email ID': 'advi21ee@cmrit.ac.in', 'Contact Number': '+91 9284597756', 'LinkedIn': 'www.linkedin.com/in/adarshvinod692021', 'GitHub': 'https://github.com/devAdarsh19', 'Skills': 'C, Python, Java, Matlab, Scilab, Cadence Virtuoso, Intel Quartus Prime, VS Code, Unity, Git, SQLite3, MySQL, HTML, CSS, JavaScript, React, Django, Bootstrap, OpenCV, Machine Learning, QGIS, Arduino, ESP32, ReactJS, Django (Python), Circumeo'}
Sure, based on the skills you've mentioned, here are five project ideas that you can work on and successfully complete:

### Project 1: IoT Weather Station
**Project Description:**
Create an IoT-based weather station using Arduino or ESP32 to collect environmental data such as temperature, humidity, and pressure. The data will be sent to a web server for real-time monitoring.

**Tools and Skills Utilized:**
- Arduino/ESP32
- Sensors (e.g., DHT22, BMP280)
- JavaScript, HTML, CSS
- ReactJS
- SQLite3/MySQL
- Git

**Steps:**
1. Set up the Arduino/ESP