## Packages & Bibliothèque

In [63]:
import os
import pandas as pd
from operator import itemgetter
import numpy as np
import matplotlib.pyplot as plt
import ast
import json

## Parameters

In [None]:
# Définir les intervalles de connexions
intervals = [0, 100, 500, 1000, 5000, 10000, float('inf')]  # ajustez les intervalles selon vos besoins

# Définir les étiquettes pour les intervalles
labels = ['0-100', '101-500', '501-1000', '1001-5000', '5001-10000', '10001+']

In [None]:
%matplotlib inline

In [None]:
data_path = '/Users/jeanmermozeffi/Documents/DDMA/DATA CAPTAIN/Data/DataLeadDirectorProfille.csv'

In [None]:
df_raw = pd.read_csv(data_path)

In [None]:
df_raw.head()

In [None]:
df_raw.shape

In [None]:
df = df_raw.copy()

In [None]:
df.describe()

In [None]:
columns = df.columns.tolist()

In [None]:
df.info()

In [None]:
columns

In [None]:
extract_columns = ['linkedin_profile_id', 'first_name', 'last_name', 'company_name', 'current_company_name', 'headline', 'industry', 'job_title', 'languages', 'linkedin_profile_url', 'profile_image_url', 'location', 'number_connections', 'number_followers', 'profile_language']

In [None]:
df = df[extract_columns]

In [None]:
df.head()

In [None]:
langages = df['languages'].unique()
langages

In [None]:
industries = df['industry'].unique()

In [None]:
output_directory = 'Resultats'

if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# for industry in industries:
#     output_path = os.path.join(output_directory, f'{industry}_segmentation.csv')
#     industry_data.to_csv(output_path, index=False)

In [None]:
# Créer un histogramme pour le nombre d'occurrences par industrie
plt.figure(figsize=(10, 6))
df['industry'].value_counts().plot(kind='bar', color='skyblue')
plt.title('Segmentation par Industrie')
plt.xlabel('Industrie')
plt.ylabel('Nombre d\'occurrences')
plt.xticks(rotation=45)
plt.show()

In [None]:
df['industry']

In [None]:
top_industries = df['industry'].value_counts().head(10)

plt.figure(figsize=(12, 6))
top_industries.plot(kind='bar', color='skyblue')
plt.title('Top 10 des Industries')
plt.xlabel('Industrie')
plt.ylabel('Nombre d\'occurrences')
plt.xticks(rotation=45, ha='right')
plt.show()

## Segmentation par langues

In [None]:
df_language = df[df['languages'].apply(lambda x: len(x) > 0)]
df_language.head()

In [None]:
df_language['current_company_name']

## Trier le DataFrame selon le nombre de followers en ordre décroissant

In [None]:
df_sorted_followers = df.sort_values(by='number_followers', ascending=False)
df_sorted_followers.head()

In [None]:
top_profiles_followers = df_sorted_followers.head(10)

## regrouper les données par intervalles d'abonnées

In [None]:
# Ajouter une nouvelle colonne avec les intervalles de connexions
df.loc[:, 'follower_interval'] = pd.cut(df['number_followers'], bins=intervals, labels=labels, right=False)

# Afficher le nombre d'occurrences par intervalle de connexions
connection_counts = df['follower_interval'].value_counts()

# Créer un diagramme à barres pour les intervalles de connexions
plt.figure(figsize=(12, 6))
connection_counts.sort_index().plot(kind='bar', color='blue')
plt.title('Répartition par Intervalles de abonnés')
plt.xlabel('Intervalles de abonnées')
plt.ylabel('Nombre d\'occurrences')
plt.xticks(rotation=0)
plt.show()

## regrouper les données par intervalles de connexions 

In [None]:
# Ajouter une nouvelle colonne avec les intervalles de connexions
df.loc[:, 'connection_interval'] = pd.cut(df['number_connections'], bins=intervals, labels=labels, right=False)

# Afficher le nombre d'occurrences par intervalle de connexions
connection_counts = df['connection_interval'].value_counts()

# Créer un diagramme à barres pour les intervalles de connexions
plt.figure(figsize=(12, 6))
connection_counts.sort_index().plot(kind='bar', color='blue')
plt.title('Répartition par Intervalles de Connexions')
plt.xlabel('Intervalles de Connexions')
plt.ylabel('Nombre d\'occurrences')
plt.xticks(rotation=0)
plt.show()

In [None]:
df.head()

In [None]:
df.columns

## Extraction 

In [47]:
data_path = '/Users/jeanmermozeffi/Documents/DDMA/DATA CAPTAIN/Data/DataLeadDirectorProfille.csv'

In [48]:
df = pd.read_csv(data_path)

In [49]:
df.head()

Unnamed: 0,birth_date,company_employees_range,company_industry,company_name,company_url,connection_degree,current_company_name,current_company_url,current_job_title,description,...,profile_url,sales_navigator_profile_id,school_name,school_url,skills,start_url,summary,title,tracking_id,volunteer_experiences
0,,11 - 50,Information Technology & Services,Mindz Group - Côte D'Ivoire,https://www.linkedin.com/company/mindzgroup-c%...,,Mindz Group - Côte D'Ivoire,https://www.linkedin.com/company/mindzgroup-c%...,Directeur général,Présent sur des marchés au potentiel de croiss...,...,https://www.linkedin.com/in/gillian-yannick-m-...,ACoAAAgqc8wBUwP7qqfPTtAR-ic7w0oCRxTUaqo,Institut Supérieur des Technologies de la Comm...,,"[""Communication stratégique"", ""Marque et ident...",https://www.linkedin.com/in/gillian-yannick-m-...,Présent sur des marchés au potentiel de croiss...,Directeur Général,794Sxtw1T/yzQmcsDHmlpQ==,[]
1,,,,Alizé Club,,,alizé club,,directeur general,,...,https://www.linkedin.com/in/jean-yves-2aa5b519,ACoAAAPTamIBwZGI_dPmQb0kv8ROACbgp7bU4jw,,,[],https://www.linkedin.com/in/jean-yves-2aa5b519...,,Directeur General,VePR/Cy9QGyn2GYn6oLGGA==,[]
2,,11 - 50,Real Estate,D.A.N Immobilia Corporartion,https://www.linkedin.com/company/d-a-n-immobilia/,,D.A.N IMMOBILIA CORPORARTION,https://www.linkedin.com/company/d-a-n-immobilia/,Président-directeur général,"INGENIEUR GENIE CIVIL, Diplômé de l'Ecole Maro...",...,https://www.linkedin.com/in/epiphane-maldini-b...,ACoAAAXPCKkBku5YjSIrG9ccvNzRs6EoOxp9dEU,EARIST/ Maroc,,"[""Business"", ""Génie civil"", ""Ingénieurs"", ""Amé...",https://www.linkedin.com/in/epiphane-maldini-b...,"INGENIEUR GENIE CIVIL, Diplômé de l'Ecole Maro...",Président-Directeur Général,sk7GN1uASbW/jFfIKttmGA==,"[{""role"": ""Président Commission Jeune Générati..."
3,,,,Pia Benin Sarl,,,pia benin sarl,,Directeur Général,,...,https://www.linkedin.com/in/ibrahima-sow-53668391,ACoAABN_tzoB9E8HYXYpieivKadUE8qpvSZJivs,,,[],https://www.linkedin.com/in/ibrahima-sow-53668...,,Directeur Général,tF2S5wAFRDm3SgM0UQQEgQ==,[]
4,,,,Hirotec Construction,,,HIROTEC Construction,,Directeur général,"Fondations spéciales, Travaux publics, Genie ...",...,https://www.linkedin.com/in/guillaume-nguyen-8...,ACoAABGC8MMBNYkmEOKRUkU6i6Jc5cfo_1IMmZU,,,"[""Développement de nouvelles affaires"", ""Lean ...",https://www.linkedin.com/in/guillaume-nguyen-8...,"Fondations spéciales, Travaux publics, Genie ...",Directeur Général,WjitPwSkQOujtbKBRbyZ4w==,[]


In [50]:
columns_experience = ['id', 'full_name', 'handle', 'linkedin_profile_id', 'experiences']

In [101]:
df_experience = df[columns_experience]

In [98]:
df_experience.head()

Unnamed: 0,id,full_name,handle,linkedin_profile_id,experiences
0,ACoAAAgqc8wBUwP7qqfPTtAR-ic7w0oCRxTUaqo,Gillian Yannick M Zouzouko,gillian-yannick-m-zouzouko-410a6a39,136999884,"[{""title"": ""Directeur général"", ""company_name""..."
1,ACoAAAPTamIBwZGI_dPmQb0kv8ROACbgp7bU4jw,Jean Yves,jean-yves-2aa5b519,64186978,"[{""title"": ""directeur general"", ""company_name""..."
2,ACoAAAXPCKkBku5YjSIrG9ccvNzRs6EoOxp9dEU,Epiphane Maldini Bonou,epiphane-maldini-bonou-17577928,97454249,"[{""title"": ""Président-directeur général"", ""com..."
3,ACoAABN_tzoB9E8HYXYpieivKadUE8qpvSZJivs,Ibrahima Sow,ibrahima-sow-53668391,327137082,"[{""title"": ""Directeur Général"", ""company_name""..."
4,ACoAABGC8MMBNYkmEOKRUkU6i6Jc5cfo_1IMmZU,Guillaume Nguyen,guillaume-nguyen-80348382,293793987,"[{""title"": ""Directeur général"", ""company_name""..."


In [104]:
df.loc[:, 'experiences'] = df['experiences'].apply(json.loads)

In [113]:
df_experience

Unnamed: 0,id,full_name,handle,linkedin_profile_id,experiences
0,ACoAAAgqc8wBUwP7qqfPTtAR-ic7w0oCRxTUaqo,Gillian Yannick M Zouzouko,gillian-yannick-m-zouzouko-410a6a39,136999884,"[{'title': 'Directeur général', 'company_name'..."
1,ACoAAAPTamIBwZGI_dPmQb0kv8ROACbgp7bU4jw,Jean Yves,jean-yves-2aa5b519,64186978,"[{'title': 'directeur general', 'company_name'..."
2,ACoAAAXPCKkBku5YjSIrG9ccvNzRs6EoOxp9dEU,Epiphane Maldini Bonou,epiphane-maldini-bonou-17577928,97454249,"[{'title': 'Président-directeur général', 'com..."
3,ACoAABN_tzoB9E8HYXYpieivKadUE8qpvSZJivs,Ibrahima Sow,ibrahima-sow-53668391,327137082,"[{'title': 'Directeur Général', 'company_name'..."
4,ACoAABGC8MMBNYkmEOKRUkU6i6Jc5cfo_1IMmZU,Guillaume Nguyen,guillaume-nguyen-80348382,293793987,"[{'title': 'Directeur général', 'company_name'..."
...,...,...,...,...,...
495,ACoAACYRl5cB9bDs39fJNjb6nCuc8oJKAa6fooI,Hazoumé Aime,hazoumé-aime-733a8a159,638687127,"[{'title': 'Directeur Général', 'company_name'..."
496,ACoAABU1ZpwBUfWZV5SfO3dh7nlRQ6h7nAfvfI4,Djibril Michael Secong,djibril-michael-secong-0781b79b,355821212,"[{'title': 'Directeur Général', 'company_name'..."
497,ACoAABMmROwB1yVCj-caPNU23gU1QnjkYKfuoJk,N'Dri Kouame Pierre,n-dri-kouame-pierre-1247178b,321275116,"[{'title': 'Directeur Général', 'company_name'..."
498,ACoAAAObIyUBH5YIm6Hz3GC02R-uPdV_pPsIkEw,Nicolas Decrock,nicolas-decrock-a0531618,60498725,"[{'title': 'Directeur General', 'company_name'..."


In [106]:
df_expanded = df_experience.explode('experiences')

In [107]:
df_expanded

Unnamed: 0,id,full_name,handle,linkedin_profile_id,experiences
0,ACoAAAgqc8wBUwP7qqfPTtAR-ic7w0oCRxTUaqo,Gillian Yannick M Zouzouko,gillian-yannick-m-zouzouko-410a6a39,136999884,"{'title': 'Directeur général', 'company_name':..."
0,ACoAAAgqc8wBUwP7qqfPTtAR-ic7w0oCRxTUaqo,Gillian Yannick M Zouzouko,gillian-yannick-m-zouzouko-410a6a39,136999884,"{'title': 'Fondateur', 'company_name': 'Carnet..."
0,ACoAAAgqc8wBUwP7qqfPTtAR-ic7w0oCRxTUaqo,Gillian Yannick M Zouzouko,gillian-yannick-m-zouzouko-410a6a39,136999884,"{'title': 'Brand and Communication Manager', '..."
0,ACoAAAgqc8wBUwP7qqfPTtAR-ic7w0oCRxTUaqo,Gillian Yannick M Zouzouko,gillian-yannick-m-zouzouko-410a6a39,136999884,"{'title': 'User Interface Designer', 'company_..."
0,ACoAAAgqc8wBUwP7qqfPTtAR-ic7w0oCRxTUaqo,Gillian Yannick M Zouzouko,gillian-yannick-m-zouzouko-410a6a39,136999884,"{'title': 'Spécialiste en design', 'company_na..."
...,...,...,...,...,...
498,ACoAAAObIyUBH5YIm6Hz3GC02R-uPdV_pPsIkEw,Nicolas Decrock,nicolas-decrock-a0531618,60498725,"{'title': 'Superviseur', 'company_name': 'SNT'..."
499,ACoAAA0cvugBOSvfcLkjvsrYJhaFPuwgDdXu7Fc,Regis Ouedraogo,regis-ouedraogo-83480b61,219987688,"{'title': 'Directeur General', 'company_name':..."
499,ACoAAA0cvugBOSvfcLkjvsrYJhaFPuwgDdXu7Fc,Regis Ouedraogo,regis-ouedraogo-83480b61,219987688,"{'title': 'Directeur d'agence', 'company_name'..."
499,ACoAAA0cvugBOSvfcLkjvsrYJhaFPuwgDdXu7Fc,Regis Ouedraogo,regis-ouedraogo-83480b61,219987688,"{'title': 'DAF', 'company_name': 'Congo termin..."


In [108]:
df_expanded.reset_index(drop=True, inplace=True)

In [109]:
df_expanded

Unnamed: 0,id,full_name,handle,linkedin_profile_id,experiences
0,ACoAAAgqc8wBUwP7qqfPTtAR-ic7w0oCRxTUaqo,Gillian Yannick M Zouzouko,gillian-yannick-m-zouzouko-410a6a39,136999884,"{'title': 'Directeur général', 'company_name':..."
1,ACoAAAgqc8wBUwP7qqfPTtAR-ic7w0oCRxTUaqo,Gillian Yannick M Zouzouko,gillian-yannick-m-zouzouko-410a6a39,136999884,"{'title': 'Fondateur', 'company_name': 'Carnet..."
2,ACoAAAgqc8wBUwP7qqfPTtAR-ic7w0oCRxTUaqo,Gillian Yannick M Zouzouko,gillian-yannick-m-zouzouko-410a6a39,136999884,"{'title': 'Brand and Communication Manager', '..."
3,ACoAAAgqc8wBUwP7qqfPTtAR-ic7w0oCRxTUaqo,Gillian Yannick M Zouzouko,gillian-yannick-m-zouzouko-410a6a39,136999884,"{'title': 'User Interface Designer', 'company_..."
4,ACoAAAgqc8wBUwP7qqfPTtAR-ic7w0oCRxTUaqo,Gillian Yannick M Zouzouko,gillian-yannick-m-zouzouko-410a6a39,136999884,"{'title': 'Spécialiste en design', 'company_na..."
...,...,...,...,...,...
1812,ACoAAAObIyUBH5YIm6Hz3GC02R-uPdV_pPsIkEw,Nicolas Decrock,nicolas-decrock-a0531618,60498725,"{'title': 'Superviseur', 'company_name': 'SNT'..."
1813,ACoAAA0cvugBOSvfcLkjvsrYJhaFPuwgDdXu7Fc,Regis Ouedraogo,regis-ouedraogo-83480b61,219987688,"{'title': 'Directeur General', 'company_name':..."
1814,ACoAAA0cvugBOSvfcLkjvsrYJhaFPuwgDdXu7Fc,Regis Ouedraogo,regis-ouedraogo-83480b61,219987688,"{'title': 'Directeur d'agence', 'company_name'..."
1815,ACoAAA0cvugBOSvfcLkjvsrYJhaFPuwgDdXu7Fc,Regis Ouedraogo,regis-ouedraogo-83480b61,219987688,"{'title': 'DAF', 'company_name': 'Congo termin..."


In [110]:
df_expanded = pd.concat([df_expanded.drop(['experiences'], axis=1), df_expanded['experiences'].apply(pd.Series)], axis=1)

In [111]:
df_expanded

Unnamed: 0,id,full_name,handle,linkedin_profile_id,title,company_name,company_description,company_employees_range,linkedin_company_url,location,company_industry,linkedin_company_id,date,job_time_period,job_contract_type,0
0,ACoAAAgqc8wBUwP7qqfPTtAR-ic7w0oCRxTUaqo,Gillian Yannick M Zouzouko,gillian-yannick-m-zouzouko-410a6a39,136999884,Directeur général,Mindz Group - Côte D'Ivoire,,11 - 50,https://www.linkedin.com/company/mindzgroup-c%...,Abidjan,Information Technology & Services,11130520,1/2016 - current,8 years,,
1,ACoAAAgqc8wBUwP7qqfPTtAR-ic7w0oCRxTUaqo,Gillian Yannick M Zouzouko,gillian-yannick-m-zouzouko-410a6a39,136999884,Fondateur,Carnet du Voyageur (CDV),Start Up opérant dans le domaine de la promoti...,,,Abidjan,,,3/2012 - current,11 years 10 months,,
2,ACoAAAgqc8wBUwP7qqfPTtAR-ic7w0oCRxTUaqo,Gillian Yannick M Zouzouko,gillian-yannick-m-zouzouko-410a6a39,136999884,Brand and Communication Manager,FENNINVEST GROUP,En charge du branding audiovisuel des filiales...,,,"Abidjan, Côte d’Ivoire",,89668702,11/2022 - 2/2023,3 months,Full-time,
3,ACoAAAgqc8wBUwP7qqfPTtAR-ic7w0oCRxTUaqo,Gillian Yannick M Zouzouko,gillian-yannick-m-zouzouko-410a6a39,136999884,User Interface Designer,B2I Group Africa,,11 - 50,https://www.linkedin.com/company/b2i-groupe-af...,"Abidjan, Côte d’Ivoire",Computer Software,70904903,12/2020 - 12/2021,1 year,Full-time,
4,ACoAAAgqc8wBUwP7qqfPTtAR-ic7w0oCRxTUaqo,Gillian Yannick M Zouzouko,gillian-yannick-m-zouzouko-410a6a39,136999884,Spécialiste en design,B2I Groupe Africa,,11 - 50,https://www.linkedin.com/company/b2i-groupe-af...,"Abidjan, Côte d’Ivoire",Computer Software,70904903,12/2020 - 10/2021,10 months,Full-time,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1812,ACoAAAObIyUBH5YIm6Hz3GC02R-uPdV_pPsIkEw,Nicolas Decrock,nicolas-decrock-a0531618,60498725,Superviseur,SNT,,10001 -,https://www.linkedin.com/company/webhelp/,,Outsourcing/Offshoring,15667,10/1999 - 12/2000,1 year 2 months,,
1813,ACoAAA0cvugBOSvfcLkjvsrYJhaFPuwgDdXu7Fc,Regis Ouedraogo,regis-ouedraogo-83480b61,219987688,Directeur General,Societe Ivoirienne d'Entreposage et de Logisti...,,,,abidjan,,,11/2014 - current,9 years 2 months,,
1814,ACoAAA0cvugBOSvfcLkjvsrYJhaFPuwgDdXu7Fc,Regis Ouedraogo,regis-ouedraogo-83480b61,219987688,Directeur d'agence,Bollore Africa Logistics Brazzaville,,,,,,,9/2012 - 9/2014,2 years,,
1815,ACoAAA0cvugBOSvfcLkjvsrYJhaFPuwgDdXu7Fc,Regis Ouedraogo,regis-ouedraogo-83480b61,219987688,DAF,Congo terminal,,,,,,,2009 - 2012,,,


In [112]:
df_expanded.to_csv("Resultats/liste_experiences.csv", sep=',')