# Generate Datas of Tech companies

In [10]:
import pandas as pd
from faker import Faker
import random
from faker.providers import BaseProvider

In [11]:
class NepaliTechCompanyProvider(BaseProvider):
    def tech_company(self):
        tech_companies = [
            'Leapfrog Technology', 'CloudFactory', 'Fusemachines', 
            'LogPoint', 'ESignature', 'Deerwalk', 'Verscend', 
            'Braindigit', 'Javra Software', 'Verisk Nepal',
            'Insight Workshop', 'Cotiviti Nepal', 'CloudTech', 'Bits Innovation', 
            'EB Pearls', 'Sustainable Solutions', 'Janaki Technology', 'Open Eyes IT Solution', 
            'Stupa IT', 'Swift Technology', 'Information Technology Nepal', 
            'ITONICS', 'Tech One Global', 'Pycoders', 'Vertex Solutions', 
            'Elder IT Solutions', 'Proshore', 'IT Gurus', 'Upaya City Cargo', 
            'Tech Sansar', 'Merojob', 'Tiger Palace Resort', 'Jobaxle', 
            'Ansu Solutions', 'Sprout Technology', 'Webpage Nepal', 'Innovate Tech',
            'GrowByData', 'ESR Tech', 'BrainStation 23', 'Deerwalk Institute', 
            'F1Soft International', 'SunBi Design Studio', 'Verscend Technologies', 
            'Yomari', 'Neema Academy', 'Techsanjal', 'NAXA', 'Nepxpress', 
            'Green IT Solutions', 'VertexWeb', 'AXIOM Nepal', 'SastoDeal', 
            'NepaliPatro', 'SmartMobe Solutions', 'Pumori Software Solutions', 
            'Aayulogic', 'Digital Nepal', 'HoneyGuide Apps', 'Real Time Solutions', 
            'Cotiviti', 'IntroCept', 'Genese Solution', 'Braindigit IT Solution', 
            'Codewing Solutions', 'Himalaya Organization', 'Nepal GIS', 
            'Echo Innovators', 'Digital Home International', 'SoftNEP', 
            'Eminence Ways', 'Hearts & Tears', 'LogPoint', 'One Click Innovation', 
            'Arhant Solutions', 'Krishi Tech', 'Braindigit', 'Iron Gate Technology', 
            'Mavorion Systems', 'Unelma Platforms', 'DigiHub', 'Nepware', 
            'Digi Technolab', 'Tech Central', 'Stupa Tech', 'Innovative Solutions', 
            'Vertex Craft', 'Kantipur Digital Corp', 'Softmandu', 
            'Optimum Futurist', 'Sagoon', 'Subisu', 'Vianet', 'WorldLink', 
            'Nepal Telecom', 'Ncell', 'Mercantile Communications', 
            'Namaste Pay', 'F1Soft', 'eSewa', 'Khalti', 'Hamro Patro', 
            'Foodmandu', 'Pathao', 'Tootle', 'Urban Girl', 'NepBay', 
            'Nepal Health News', 'Sajha Yatayat', 'Yellow Nepal', 
            'Tech In Asia Nepal', 'Hamro Doctor', 'Nepal Drives', 
            'Nepal B2B', 'Nepal Media', 'Nepal IT', 'Nepal Computer', 
            'Nepal Software', 'Nepal App Developers', 'Nepal Internet Exchange', 
            'Nepal Web Solutions', 'Nepal Online Services', 'Nepal Wireless', 
            'Nepal Telecom Services', 'Nepal Digital Communications', 
            'Nepal Network Solutions', 'Nepal Cyber Security', 
            'Nepal E-commerce Solutions', 'Nepal Payment Solutions', 
            'Nepal Health Tech', 'Nepal Agri Tech', 'Nepal Edu Tech', 
            'Nepal Travel Tech', 'Nepal Fin Tech', 'Nepal Mobility Tech', 
            'Nepal Logistics Tech', 'Nepal Media Tech', 'Nepal Legal Tech', 
            'Nepal Retail Tech', 'Nepal Smart Tech', 'Nepal Green Tech', 
            'Nepal Energy Tech', 'Nepal Water Tech', 'Nepal Construction Tech', 
            'Nepal Manufacturing Tech', 'Nepal Automotive Tech', 
            'Nepal Fashion Tech', 'Nepal Beauty Tech', 'Nepal Wellness Tech', 
            'Nepal Hospitality Tech', 'Nepal Real Estate Tech', 
            'Nepal Food Tech', 'Nepal Beverage Tech', 'Nepal Service Tech', 
            'Nepal Logistics Tech', 'Nepal Tech Labs', 'Nepal AI Solutions', 
            'Nepal Blockchain Solutions', 'Nepal IoT Solutions', 
            'Nepal Robotics Solutions', 'Nepal Big Data Solutions', 
            'Nepal Cloud Solutions', 'Nepal Cyber Solutions', 'Nepal IT Services', 
            'Nepal Digital Solutions', 'Nepal Software Development', 
            'Nepal App Development', 'Nepal Web Development', 
            'Nepal Network Solutions', 'Nepal E-commerce Development', 
            'Nepal Digital Marketing', 'Nepal SEO Services', 
            'Nepal Content Creation', 'Nepal Graphic Design', 'Nepal UX/UI Design'
        ]
        return random.choice(tech_companies)

In [12]:
fake = Faker()
fake.add_provider(NepaliTechCompanyProvider)

In [13]:
skills = ['Python', 'Java', 'C++', 'JavaScript', 'HTML', 'CSS', 'SQL', 'R', 'Machine Learning', 'Data Science']

In [14]:
companies = []
for _ in range(300):
    company = {
        'CompanyName': fake.tech_company(),
        'RequiredSkills': random.sample(skills, k=random.randint(1, 5))
    }
    companies.append(company)

In [15]:
companies_df = pd.DataFrame(companies)

In [16]:
companies_df.to_csv('tech_companies_nepal.csv', index=False)

# Working With manpower skill-sets

In [17]:
manpower = []
for _ in range(50000):
    person = {
        'PersonID': fake.uuid4(),
        'Skills': random.sample(skills, k=random.randint(1, 5))
    }
    manpower.append(person)

In [19]:
manpower_df = pd.DataFrame(manpower)
manpower_df.to_csv('manpower_skills.csv', index=False)

# Train with ML

In [20]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [21]:
# Data preparation
companies_df['RequiredSkillsStr'] = companies_df['RequiredSkills'].apply(lambda x: ' '.join(x))
manpower_df['SkillsStr'] = manpower_df['Skills'].apply(lambda x: ' '.join(x))


In [22]:
# Vectorize the skill sets
vectorizer = CountVectorizer()
company_skill_matrix = vectorizer.fit_transform(companies_df['RequiredSkillsStr'])
manpower_skill_matrix = vectorizer.transform(manpower_df['SkillsStr'])

In [23]:
similarity_matrix = cosine_similarity(manpower_skill_matrix, company_skill_matrix)

In [24]:
job_assignments = []
for person_idx in range(similarity_matrix.shape[0]):
    best_match_idx = similarity_matrix[person_idx].argmax()
    job_assignments.append({
        'PersonID': manpower_df.iloc[person_idx]['PersonID'],
        'CompanyName': companies_df.iloc[best_match_idx]['CompanyName'],
        'MatchScore': similarity_matrix[person_idx, best_match_idx]
    })

In [25]:
assignments_df = pd.DataFrame(job_assignments)

assignments_df.to_csv('job_assignments.csv', index=False)