Suppose that Integrify wants to get some insights for the FullStack job market in order to build the best practice and update the curriculum to maximize the chance for getting as many job offers as possible for the students. 
Your tasks are the following:
Each group member will be working on one country [Finland, Netherlands, Denmark, Sweden, and Germany]
Use the following keyword sets and try to locate 20 companies in each country:
FS= [Front-end development, HTML, CSS, JavaScript, React, Angular, Vue.js, Bootstrap, jQuery, responsive design, Back-end development, Node.js, Python, Ruby, PHP, Java, .NET, SQL, NoSQL, RESTful APIs, web servers, Database management,  MySQL, PostgreSQL, MongoDB, Redis, Cassandra, Oracle, SQL Server, DevOps, AWS, Azure, Google Cloud, Docker, Kubernetes, Git, Jenkins, Travis CI, CircleCI, monitoring and logging tools, Project management, Agile, Scrum, Kanban, JIRA, Trello, Asana, project planning, team collaboration, communication skills]

Collect all job offers of each company for a one-year time frame. 
You will end up with a dictionary where the keys are the company names and the values are a list of dictionaries. 
The keys in the sub-dictionaries correspond to keywords, and the values correspond to the company’s posts that include those keywords. 
In total, you will produce five dictionaries, each corresponding to one of the listed countries above. 
Save each dictionary in JSON format under the name of the corresponding country.


In [None]:
import requests
import json
from bs4 import BeautifulSoup

class LinkedinScraper:
    keywords = ['Front-end development', 'HTML', 'CSS', 'JavaScript', 'React', 'Angular', 'Vue.js', 'Bootstrap', 'jQuery', 'responsive design',
                'Back-end development', 'Node.js', 'Python', 'Ruby', 'PHP', 'Java', '.NET', 'SQL', 'NoSQL', 'RESTful APIs', 'web servers',
                'Database management', 'MySQL', 'PostgreSQL', 'MongoDB', 'Redis', 'Cassandra', 'Oracle', 'SQL Server',
                'DevOps', 'AWS', 'Azure', 'Google Cloud', 'Docker', 'Kubernetes', 'Git', 'Jenkins', 'Travis CI', 'CircleCI', 'monitoring and logging tools',
                'Project management', 'Agile', 'Scrum', 'Kanban', 'JIRA', 'Trello', 'Asana', 'project planning', 'team collaboration', 'communication skills']

    def __init__(self, country_name, geoId, companies):
        self.country_name = country_name
        self.geoId = geoId
        self.companies = companies
        self.search_url_pattern = 'https://www.linkedin.com/jobs/search/?currentJobId={}&distance=25&geoId={}&keywords={}&refresh=true&start={}'

    def scrape_jobs(self, output_file_path):
        job_list = []
        for i, company in enumerate(self.companies):
            print(f"Company {i+1}: {company}")
            start_index = 0

            while True:
                search_url = self.search_url_pattern.format(start_index, self.geoId, company, start_index)
                response = requests.get(search_url)

                # Parse the HTML content of the page using BeautifulSoup
                soup = BeautifulSoup(response.content, 'html.parser')

                # Find all the job posting URLs on the page
                job_urls = [a['href'] for a in soup.find_all('a', href=True) if '/jobs/view/' in a['href']]

                # If no matching jobs are found, break the loop
                if len(job_urls) == 0:
                    break

                # Iterate over each job URL and extract the job information
                for job_url in job_urls:
                    # Send a GET request to the job URL
                    response = requests.get(job_url)

                    # Parse the HTML content of the page using BeautifulSoup
                    soup = BeautifulSoup(response.content, 'html.parser')

                    # Find the job title and company name
                    title = soup.find('h1', class_='topcard__title')
                    if title is not None:
                        title = title.text.strip()
                    else:
                        continue
                    company_name = soup.find('a', class_='topcard__org-name-link')
                    if company_name is not None:
                        company_name = company_name.text.strip()
                    else:
                        continue

                    # Find the div tag that contains the job description
                    description_div = soup.find('div', class_='description__text')

                    if description_div is not None:
                        # Extract the job description text
                        job_description = description_div.get_text().strip()
                        job_description = job_description.replace('Show more', '').replace('Show less', '')

                                                # Remove any extra spaces from the job description
                        job_description = ' '.join(job_description.split())

                        # Find the keywords present in the job description
                        found_keywords = [keyword for keyword in self.keywords if keyword in job_description]

                        # Create a dictionary with job data
                        job_data = {
                            'Country_name': self.country_name,
                            'company_name': company_name,
                            'keywords': found_keywords,
                            'description': job_description
                        }

                        job_list.append(job_data)
                        print("Job Title:", title)
                        print("Company Name:", company_name)
                        print("Keywords:", found_keywords)
                        print("Description:", job_description)
                        print()

                    start_index += len(job_urls)

        # Write the job data to the output file
        with open(output_file_path, 'w', encoding='utf-8') as json_file:
            json.dump(job_list, json_file, indent=4)

# Example usage : Finland
# country_name = "Finland"
# geoId = "100456013"
# companies = ['Oliver Parks', 'Trimble Inc.', 'Knowit', 'Eficode', 'Wolt', 'Nigel Frank International', 'Tietoevry', 'Nortal', 'Silo AI', 'Solita', 'CGI', 'Accenture', 'Wärtsilä', 'F-Secure', 'Neste', 'Nordea', 'Aiven', 'Unikie', 'Smartly.io', 'Nokia', 'Vaisala', 'Futurice', 'Elisa', 'Taiste', 'AlphaSense', 'Gofore', 'Canonical', 'Vincit']

# scraper = LinkedinScraper(country_name, geoId, companies)
# scraper.scrape_jobs("jobs_finland.json")

#Example usage : Norway
# country_name = "Norway"
# geoId = "103819153"
# companies = ['Equinor', 'Viddal Automation AS', 'Appen', 'StaffHost Europe', 'Posten Norge AS', 'Accenture Nordics', 'PA Consulting', 'Capgemini', 'Ving Norge AS', 'Nexere Consulting', 'Dun & Bradstreet Europe', 'Yara International', 'Nielsen', 'SpareBank 1 SMN', 'Vegfinans', 'TOMRA', 'Fugro']
# scraper = LinkedinScraper(country_name, geoId, companies)
# scraper.scrape_jobs("jobs_Norway.json")

#Example usage : Germany
country_name = "Germany"
geoId = "101282230"
companies = ['xValue GmbH', 'kloeckner.i GmbH', 'Gerhard Schubert GmbH Verpackungsmaschinen', 'Renesas Electronics', 'GIANT-HR Mittelstandsberatung GmbH', 'RITTERWALD Unternehmensberatung GmbH', 'Axelera AI', 'AILY LABS', 'Hypatos', 'Boehringer Ingelheim', 'neurocat', 'ACST GmbH', 'TES-H2', 'BwFuhrparkService GmbH', 'MEIKO Group', 'Lufthansa Technik', 'Fraunhofer Karriere', 'Liebherr Group', 'Genova.ai']
scraper = LinkedinScraper(country_name, geoId, companies)
scraper.scrape_jobs("jobs_Germany.json")

#Example usage : Denmark
# country_name = "Denmark"
# geoId = "104514075"
# companies = ['Cognizant', 'Vestas', 'EIVEE™', 'Radiobotics', 'HelloFresh', 'Kiloo Games', 'Ipsos', 'TotalEnergies', 'Elos Medtech', 'GEA Group', 'Siemens Gamesa', 'SimCorp']
# scraper = LinkedinScraper(country_name, geoId, companies)
# scraper.scrape_jobs("jobs_Denmark.json")
                    
