In [1]:
import pandas as pd

# File paths
input_file = "job_descriptions.csv"   # Your large CSV file
output_file = "cleaned_job_descriptions.csv"  # Output after removing columns

# Define columns to **keep** (Remove others)
columns_to_keep = [
    "JobId", "Experience", "SalaryRange","Qualifications", "location", "WorkType",
    "JobPostingDate", "JobTitle","Role" ,"JobDescription", "skills", "Company"
]

# Process CSV in chunks to handle large file efficiently
chunk_size = 100000  # Adjust chunk size based on system memory

# Read in chunks, remove unwanted columns, and save cleaned data
chunks = []
for chunk in pd.read_csv(input_file, usecols=columns_to_keep, chunksize=chunk_size):
    chunks.append(chunk)

# Concatenate cleaned chunks and save
cleaned_data = pd.concat(chunks, ignore_index=True)
cleaned_data.to_csv(output_file, index=False)

print("‚úÖ Cleaned CSV saved as:", output_file)


‚úÖ Cleaned CSV saved as: cleaned_job_descriptions.csv


In [3]:
import pandas as pd

# File path
input_file = "cleaned_job_descriptions.csv"

# Column to extract unique values from
column_name = "JobTitle"

# Process CSV in chunks
chunk_size = 100000  # Adjust based on your system
unique_job_titles = set()

for chunk in pd.read_csv(input_file, usecols=[column_name], chunksize=chunk_size):
    unique_job_titles.update(chunk[column_name].dropna().unique())

# Convert to a sorted list for better readability
unique_job_titles = sorted(unique_job_titles)

# Display the unique job titles
print("‚úÖ Total Unique Job Titles:", len(unique_job_titles))
print(unique_job_titles)
print(unique_job_titles.count)


‚úÖ Total Unique Job Titles: 147
['Account Director', 'Account Executive', 'Account Manager', 'Accountant', 'Administrative Assistant', 'Aerospace Engineer', 'Architect', 'Architectural Designer', 'Art Director', 'Art Teacher', 'Back-End Developer', 'Brand Ambassador', 'Brand Manager', 'Business Analyst', 'Business Development Manager', 'Chemical Analyst', 'Chemical Engineer', 'Civil Engineer', 'Content Writer', 'Copywriter', 'Customer Service Manager', 'Customer Service Representative', 'Customer Success Manager', 'Customer Support Specialist', 'Data Analyst', 'Data Engineer', 'Data Entry Clerk', 'Data Scientist', 'Database Administrator', 'Database Developer', 'Dental Hygienist', 'Digital Marketing Specialist', 'Electrical Designer', 'Electrical Engineer', 'Email Marketing Specialist', 'Environmental Consultant', 'Environmental Engineer', 'Event Coordinator', 'Event Manager', 'Event Planner', 'Executive Assistant', 'Family Lawyer', 'Family Nurse Practitioner', 'Finance Manager', 'Fin

In [5]:
import pandas as pd

# File path
input_file = "cleaned_job_descriptions.csv"

# Column to analyze
column_name = "JobTitle"

# Dictionary to store counts
job_title_counts = {}

# Process CSV in chunks
chunk_size = 100000  # Adjust based on system memory

for chunk in pd.read_csv(input_file, usecols=[column_name], chunksize=chunk_size):
    # Count occurrences in each chunk
    value_counts = chunk[column_name].value_counts().to_dict()
    
    # Merge counts with main dictionary
    for title, count in value_counts.items():
        job_title_counts[title] = job_title_counts.get(title, 0) + count

# Convert to a sorted DataFrame for better readability
job_title_counts = pd.DataFrame(list(job_title_counts.items()), columns=["JobTitle", "Count"])
job_title_counts = job_title_counts.sort_values(by="Count", ascending=False)

# Display the results
print(job_title_counts)

# Optional: Save the results to a CSV file
job_title_counts.to_csv("job_title_counts.csv", index=False)
print("‚úÖ Job title counts saved to 'job_title_counts.csv'")


                         JobTitle  Count
0                  UX/UI Designer  48551
1    Digital Marketing Specialist  27975
2               Software Engineer  27630
3                Network Engineer  24393
6                 Software Tester  20945
..                            ...    ...
131            Personal Assistant   3441
143                   QA Engineer   3439
133       Procurement Coordinator   3424
146           Key Account Manager   3411
135             Inventory Analyst   3342

[147 rows x 2 columns]
‚úÖ Job title counts saved to 'job_title_counts.csv'


In [6]:
import pandas as pd

# File paths
input_file = "cleaned_job_descriptions.csv"
output_file = "reduced_job_descriptions.csv"

# Limit per job title
entries_per_title = 1000  # Adjust this value as needed

# Process CSV in chunks
chunk_size = 100000
job_title_data = {}

for chunk in pd.read_csv(input_file, chunksize=chunk_size):
    for title, group in chunk.groupby("JobTitle"):
        if title not in job_title_data:
            job_title_data[title] = group[:entries_per_title]  # Keep only first N entries

# Combine all filtered job titles
reduced_data = pd.concat(job_title_data.values(), ignore_index=True)

# Save the reduced dataset
reduced_data.to_csv(output_file, index=False)

print("‚úÖ Reduced dataset saved as:", output_file)


‚úÖ Reduced dataset saved as: reduced_job_descriptions.csv


In [3]:
import pandas as pd
import google.generativeai as genai
import time

# Set your Gemini API Key
GEMINI_API_KEY = "AIzaSyDLeKzfiM1u_Yzjv7iG3279qgXPUUKcdjY"
genai.configure(api_key=GEMINI_API_KEY)

# Define job titles (Replace with actual titles)
job_titles = ['Back-End Developer', 
              'Brand Ambassador', 'Brand Manager', 'Business Analyst', 'Business Development Manager', 'Chemical Analyst', 
              'Chemical Engineer', 'Civil Engineer', 'Content Writer', 'Copywriter', 'Customer Service Manager', 
              'Customer Service Representative', 'Customer Success Manager', 'Customer Support Specialist', 'Data Analyst', 
              'Data Engineer', 'Data Entry Clerk', 'Data Scientist', 'Database Administrator', 'Database Developer', 'Dental Hygienist', 
              'Digital Marketing Specialist', 'Electrical Designer', 'Electrical Engineer', 'Email Marketing Specialist', 
              'Environmental Consultant', 'Environmental Engineer', 'Event Coordinator', 'Event Manager', 'Event Planner', 
              'Executive Assistant', 'Family Lawyer', 'Family Nurse Practitioner', 'Finance Manager', 'Financial Advisor', 
              'Financial Analyst', 'Financial Controller', 'Financial Planner', 'Front-End Developer', 'Front-End Engineer', 'Graphic Designer', 
              'HR Coordinator', 'HR Generalist', 'HR Manager', 'Human Resources Manager', 'IT Administrator', 'IT Manager', 'IT Support Specialist', 
              'Interior Designer', 'Inventory Analyst', 'Investment Advisor', 'Investment Analyst', 'Investment Banker', 'Java Developer', 
              'Key Account Manager', 'Landscape Architect', 'Landscape Designer', 'Legal Advisor', 'Legal Assistant', 'Legal Counsel', 'Legal Secretary', 
              'Litigation Attorney', 'Market Analyst', 'Market Research Analyst', 'Marketing Analyst', 'Marketing Coordinator', 'Marketing Director', 'Marketing Manager', 
              'Marketing Specialist', 'Mechanical Designer', 'Mechanical Engineer', 'Network Administrator', 'Network Analyst', 'Network Engineer', 'Network Security Specialist', 
              'Network Technician', 'Nurse Manager', 'Nurse Practitioner', 'Occupational Therapist', 'Office Manager', 'Operations Manager', 'Paralegal', 'Pediatrician', 'Personal Assistant', 
              'Pharmaceutical Sales Representative', 'Physical Therapist', 'Physician Assistant', 'Process Engineer', 'Procurement Coordinator', 'Procurement Manager', 'Procurement Specialist',
                'Product Designer', 'Product Manager', 'Project Coordinator', 'Project Manager', 'Psychologist', 'Public Relations Specialist', 
                'Purchasing Agent', 'QA Analyst', 'QA Engineer', 'Quality Assurance Analyst', 'Registered Nurse', 'Research Analyst', 'Research Scientist', 
                'SEM Specialist', 'SEO Analyst', 'SEO Specialist', 'Sales Associate', 'Sales Consultant', 'Sales Manager', 'Sales Representative', 
                'Social Media Coordinator', 'Social Media Manager', 'Social Worker', 'Software Architect', 'Software Developer', 'Software Engineer', 
                'Software Tester', 'Speech Therapist', 'Structural Engineer', 'Substance Abuse Counselor', 'Supply Chain Analyst', 'Supply Chain Manager', 
                'Systems Administrator', 'Systems Analyst', 'Systems Engineer', 'Tax Consultant', 'Teacher', 'Technical Writer', 'UI Developer', 
                'UX Researcher', 'UX/UI Designer', 'Urban Planner', 'Veterinarian', 'Web Designer', 'Web Developer', 'Wedding Planner']

# Output file
output_file = "ai_generated_resumes1.csv"

# Function to generate multiple resumes per job title
def generate_resumes(job_title, num_resumes=5):
    resumes = []
    for i in range(num_resumes):
        prompt = f"""
        Generate a structured resume content for a {job_title}. 
        The resume should contain only:
        - **Skills** (List of relevant technical & soft skills)
        - **Education** (Relevant degrees and certifications)
        - **Experience** (Previous roles and work experience)
        
        Ensure each resume is unique and suitable for {job_title}. 
        Format the response as a structured text.
        """

        try:
            model = genai.GenerativeModel("gemini-1.5-pro")
            response = model.generate_content(prompt)
            resume_text = response.text.strip()
            resumes.append({"Category": job_title, "Resume": resume_text})
        except Exception as e:
            print(f"‚ö†Ô∏è Error generating resume for {job_title}: {e}")

        time.sleep(1)  # Prevents API rate limit issues

    return resumes

# Generate resumes for all job titles
all_resumes = []
for title in job_titles:
    print(f"üìù Generating resumes for: {title}...")
    all_resumes.extend(generate_resumes(title, num_resumes=3))  # Generate 10 per job title

# Save the generated resumes to CSV
resumes_df = pd.DataFrame(all_resumes)
resumes_df.to_csv(output_file, index=False)

print("‚úÖ AI-generated resumes saved as:", output_file)


üìù Generating resumes for: Back-End Developer...
üìù Generating resumes for: Brand Ambassador...
üìù Generating resumes for: Brand Manager...
üìù Generating resumes for: Business Analyst...
‚ö†Ô∏è Error generating resume for Business Analyst: 504 Deadline Exceeded
üìù Generating resumes for: Business Development Manager...
üìù Generating resumes for: Chemical Analyst...
üìù Generating resumes for: Chemical Engineer...
üìù Generating resumes for: Civil Engineer...
‚ö†Ô∏è Error generating resume for Civil Engineer: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 26
}
]
‚ö†Ô∏è Error generating resume for Civil Engineer: 429 You exceeded your current quota, please check your plan and billing

KeyboardInterrupt: 

In [None]:
from collections import defaultdict

# Sample list of job titles (Replace with actual titles)
job_titles = ['Account Director', 'Account Executive', 'Account Manager', 'Accountant', 'Administrative Assistant', 
              'Aerospace Engineer', 'Architect', 'Architectural Designer', 'Art Director', 'Art Teacher', 'Back-End Developer', 
              'Brand Ambassador', 'Brand Manager', 'Business Analyst', 'Business Development Manager', 'Chemical Analyst', 
              'Chemical Engineer', 'Civil Engineer', 'Content Writer', 'Copywriter', 'Customer Service Manager', 
              'Customer Service Representative', 'Customer Success Manager', 'Customer Support Specialist', 'Data Analyst', 
              'Data Engineer', 'Data Entry Clerk', 'Data Scientist', 'Database Administrator', 'Database Developer', 'Dental Hygienist', 
              'Digital Marketing Specialist', 'Electrical Designer', 'Electrical Engineer', 'Email Marketing Specialist', 
              'Environmental Consultant', 'Environmental Engineer', 'Event Coordinator', 'Event Manager', 'Event Planner', 
              'Executive Assistant', 'Family Lawyer', 'Family Nurse Practitioner', 'Finance Manager', 'Financial Advisor', 
              'Financial Analyst', 'Financial Controller', 'Financial Planner', 'Front-End Developer', 'Front-End Engineer', 'Graphic Designer', 
              'HR Coordinator', 'HR Generalist', 'HR Manager', 'Human Resources Manager', 'IT Administrator', 'IT Manager', 'IT Support Specialist', 
              'Interior Designer', 'Inventory Analyst', 'Investment Advisor', 'Investment Analyst', 'Investment Banker', 'Java Developer', 
              'Key Account Manager', 'Landscape Architect', 'Landscape Designer', 'Legal Advisor', 'Legal Assistant', 'Legal Counsel', 'Legal Secretary', 
              'Litigation Attorney', 'Market Analyst', 'Market Research Analyst', 'Marketing Analyst', 'Marketing Coordinator', 'Marketing Director', 'Marketing Manager', 
              'Marketing Specialist', 'Mechanical Designer', 'Mechanical Engineer', 'Network Administrator', 'Network Analyst', 'Network Engineer', 'Network Security Specialist', 
              'Network Technician', 'Nurse Manager', 'Nurse Practitioner', 'Occupational Therapist', 'Office Manager', 'Operations Manager', 'Paralegal', 'Pediatrician', 'Personal Assistant', 
              'Pharmaceutical Sales Representative', 'Physical Therapist', 'Physician Assistant', 'Process Engineer', 'Procurement Coordinator', 'Procurement Manager', 'Procurement Specialist',
                'Product Designer', 'Product Manager', 'Project Coordinator', 'Project Manager', 'Psychologist', 'Public Relations Specialist', 
                'Purchasing Agent', 'QA Analyst', 'QA Engineer', 'Quality Assurance Analyst', 'Registered Nurse', 'Research Analyst', 'Research Scientist', 
                'SEM Specialist', 'SEO Analyst', 'SEO Specialist', 'Sales Associate', 'Sales Consultant', 'Sales Manager', 'Sales Representative', 
                'Social Media Coordinator', 'Social Media Manager', 'Social Worker', 'Software Architect', 'Software Developer', 'Software Engineer', 
                'Software Tester', 'Speech Therapist', 'Structural Engineer', 'Substance Abuse Counselor', 'Supply Chain Analyst', 'Supply Chain Manager', 
                'Systems Administrator', 'Systems Analyst', 'Systems Engineer', 'Tax Consultant', 'Teacher', 'Technical Writer', 'UI Developer', 
                'UX Researcher', 'UX/UI Designer', 'Urban Planner', 'Veterinarian', 'Web Designer', 'Web Developer', 'Wedding Planner']


# Dictionary to store job titles alphabetically
job_dict = defaultdict(list)

# Group job titles by their first letter
for title in job_titles:
    first_letter = title[0].upper()  # Get the first letter (uppercase)
    job_dict[first_letter].append(title)

# File to save the grouped job titles
output_file = "grouped_job_titles.txt"

# Write the grouped job titles to the file
with open(output_file, "w") as file:
    for letter, titles in sorted(job_dict.items()):
        file.write(f"\n **Job Titles Starting with '{letter}'**\n")
        file.write(f"{letter}: {titles}\n")

print(f"‚úÖ Grouped job titles have been saved to {output_file}")


‚úÖ Grouped job titles have been saved to grouped_job_titles.txt
