In [3]:
// Adding some packages
#r "nuget:HtmlAgilityPack"
#r "nuget:HtmlAgilityPack.CssSelectors"

Installed package HtmlAgilityPack.CssSelectors version 1.0.2

Installed package HtmlAgilityPack version 1.11.42

In [6]:
using System;
using System.Linq;


public class JobOffer
{
    public string Title { get; set; }
    public string Company { get; set; }
    public string Description { get; set; }
    public string City { get; set; }
    private string _nbDays { get; set; }
    public string DaysAgo
    {
        get => _nbDays;
        set
        {
            if (value.Contains("Il y a"))
            {
                _nbDays = string.Join("", value.ToCharArray().Where(Char.IsDigit));
            }
            else
            {
                _nbDays = "0";
            }
        }
    }
    public DateTime Published
    { get => DateTime.Now.AddDays(-Int32.Parse(DaysAgo)); }
}

In [24]:
using System.Net;
using System.Net.Http;
using System.Threading.Tasks;
using System.Collections.Generic;
using HtmlAgilityPack;
using System.Linq;
using System.Web;

public class Indeed
{
    public string response { get; set; }
    public HtmlDocument htmlDoc { get; set; }
    public int MaxValue { get; }
    public string Location { get; }
    public string Query { get; }
    public int NbPages { get; set; }
    public Indeed(string query, int nbPages, string location = "France")
    {
        NbPages = nbPages;
        Query = query;
        Location = location;
    }

    public List<string> MakeUrls()
    {
        var urls = new List<string>();
        string baseUrl = "http://fr.indeed.com";
        string query = HttpUtility.UrlEncode(Query);
        string location = HttpUtility.UrlEncode(Location);
        urls.Add(baseUrl + "/emplois?q=" + query + "&l=" + location + "&limit=50");

        if (NbPages == 1)
            return urls;
        int currStart = 50;
        for (int i = 1; i < NbPages; i++)
        {
            string url = $"https://fr.indeed.com/emplois?as_and={query}&limit={50}&start={currStart}&l={location}";
            currStart += 50;
            urls.Add(url);
        }

        return urls;
    }

    public List<JobOffer> GetJobs()
    {
        var urls = MakeUrls();
        var allJobs = new List<List<JobOffer>>();
        foreach (var url in urls)
        {
            var job = ParseHtml(url);
            allJobs.Add(job);
        }

        return allJobs.SelectMany(i => i).ToList();
    }
    public static string GetUrl(string url)
    {
        HttpClient client = new HttpClient();
        ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls13;
        client.DefaultRequestHeaders.Accept.Clear();
        var response = client.GetStringAsync(url).Result;
        return response;
    }



    List<JobOffer> ParseHtml(string url)
    {
        var response = GetUrl(url);
        var htmlDoc = new HtmlDocument();
        htmlDoc.LoadHtml(response);
        var res = htmlDoc.DocumentNode.Descendants("h2").
            Where(node => node.GetAttributeValue("class", "").
            Contains("jobTitle") || node.GetAttributeValue("class", "").
            Contains("new"));

        int nbJobs = GetByClass(htmlDoc, "div", "class", "job_seen_beacon").Count;
        var jobTitles = GetByClass(htmlDoc, "h2", "class", "jobTitle");
        var companyNames = GetByClass(htmlDoc, "span", "class", "companyName");
        var companyLocations = GetByClass(htmlDoc, "div", "class", "companyLocation");
        var jobSnippets = GetByClass(htmlDoc, "div", "class", "job-snippet");
        var daysAgo = GetByClass(htmlDoc, "span", "class", "date");

        var Jobs = new List<JobOffer>();

        for (int i = 0; i < nbJobs; i++)
        {
            var job = new JobOffer();
            job.Title = jobTitles.ElementAt(i);
            job.Company = companyNames.ElementAt(i);
            job.City = companyLocations.ElementAt(i);
            job.Description = jobSnippets.ElementAt(i);
            job.DaysAgo = daysAgo.ElementAt(i);
            Jobs.Add(job);
        }
        return Jobs;
    }

    static List<string> GetByClass(HtmlDocument htmlDoc, string descendant, string attribute, string className)
    {
        var res = htmlDoc.DocumentNode.Descendants(descendant).
        Where(node => node.GetAttributeValue(attribute, "").
              Contains(className));
        return res.Select(val => val.InnerText).ToList();
    }

}

In [8]:
public static string Input(string msg)
{
    Console.Write(msg);
    return Console.ReadLine();
}

In [25]:
var query = "Data science";
var nbPages = 4;
var location = "France";
var scraper = new Indeed(query, nbPages, location);

var urls = scraper.MakeUrls();

foreach (var url in urls)
    {
        Console.WriteLine(url);
    }

http://fr.indeed.com/emplois?q=Data+science&l=France&limit=50
https://fr.indeed.com/emplois?as_and=Data+science&limit=50&start=50&l=France
https://fr.indeed.com/emplois?as_and=Data+science&limit=50&start=100&l=France
https://fr.indeed.com/emplois?as_and=Data+science&limit=50&start=150&l=France


In [26]:
var allJobs = scraper.GetJobs();

In [27]:
allJobs

index,Title,Company,Description,City,DaysAgo,Published
0,nouveauData Scientist H/F,Energiency,Réalisation d’études de data science.  Réalisation d’une veille active sur les publications scientifiques et les techniques relatives à la data science.,35000 Rennes,0,2022-03-02 02:22:35Z
1,nouveauDATA ANALYST - NIORT H/F,MAIF,"DATA ANALYST - NIORT H/F.  Au sein de la Tribu Intelligence Artificielle et Data Opérationnelle de la Datafactory MAIF, nous recherchons un Data Analyst afin d…",Niort (79)+ 1 lieu,0,2022-03-02 02:22:35Z
2,Data Scientist Big Data (F/H),AXA,Gestion de projet data et data science.  Animation de formations data d’acculturation et de formations professionnalisantes internes sur les outils et pratiques…,Nanterre (92),0,2022-03-02 02:22:35Z
3,Data Analyst - AXA Climate (F/M) - PARIS,AXA,"The delivery of these missions encompasses a wide range of responsibilities and tasks from data collection, data cleaning, data analysis, data modeling, data…",Paris (75),0,2022-03-02 02:22:35Z
4,nouveauAlternant Data Scientist (H/F),Carrefour,Vous serez rattaché(e) au service Pricing/Data science Front De Vente et vous aurez un rôle de data scientist transverse au sein de la BU Carfuel.,91000 Évry,0,2022-03-02 02:22:35Z
5,Data Scientist H/F,Better and Stronger,La présentation des solutions data aux clients.  La construction d’une roadmap à long terme avec le client en partenariat avec le responsable du compte.,69001 Lyon,0,2022-03-02 02:22:35Z
6,Data Scientist Climat H/F,Generali,"Generali, l’un des leaders mondiaux de l’assurance, accompagne plus de 65 millions de clients à travers le monde.  Type d'emploi : Temps plein, CDI.",Saint-Denis (93),0,2022-03-02 02:22:35Z
7,nouveauStagiaire Data Scientist H/F,HIPPY MEDTEC SYSTEMS,"Formaliser les problèmes et proposer de solutions théoriques basées sur des études statistiques, du Machine Learning et de la data visualisation,.",13011 Marseille 11e,0,2022-03-02 02:22:35Z
8,DATA SCIENTIST H/F,BNP Paribas,Cette expérience vous permettra de mettre en pratique vos compétences en matière d’analyse de données sur des jeux de données complexes et volumineux au côté…,Nanterre (92)+ 1 lieu,0,2022-03-02 02:22:35Z
9,Data Scientist (H/F) en Stage,AXA,"Formalisation des problèmes et proposition de solutions théoriques basées sur des études statistiques, du Machine Learning, de la data visualisation,.",Nanterre (92),0,2022-03-02 02:22:35Z


In [19]:
Indeed.GetUrl("http://fr.indeed.com/emplois?q=Data+science&l=France&limit=50");