In [1]:
import requests
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm

In [2]:
# input listing of large companies
df = pd.read_csv("../../data/processed/Entreprises/insee_entreprises_effectifs_sup_500_post_2017.csv", sep=";")
df.head()

Unnamed: 0,siren,denominationUsuelleEtablissement,anneeEffectifsEtablissement,trancheEffectifsEtablissement_texte,trancheEffectifsEtablissement
0,7080021,,2017.0,500 à 999 salariés,41.0
1,16950842,,2017.0,500 à 999 salariés,41.0
2,16980062,,2017.0,1 000 à 1 999 salariés,42.0
3,17251067,,2017.0,500 à 999 salariés,41.0
4,56501711,,2017.0,1 000 à 1 999 salariés,42.0


In [3]:
# references = {"FJ": "Chiffres d’affaires nets"}

def get_consolidated_CA(siren, year=2018):
    """
    Ne garde que la CA du Compte annuel consolidé de l'année requetée, si disponible. 
    Ignore Compte annuel complet (cf. https://github.com/phe-sto/enthic/issues/1)
    """
    response = requests.get("http://api.enthic.fr/company/siren/{}/{}".format(str(siren).zfill(9), year))
    FJ = np.nan
    if response.status_code == 200:
        data = response.json()
        denomination = data["denomination"]["value"]
        if data["devise"]["value"]!="Euro":
            raise ValueError("Devise is not 'Euro' for siren {}!".format(siren))
        data = data["financial_data"] #only one in FR ?
        for d in data:
            if "FJ" in d.keys():
                if d["FJ"]["account"]=='Compte annuel consolidé':
                    FJ = int(d["FJ"]["value"])
        return(siren, denomination, FJ)
    else:
        print("Siren {} not found".format(siren))
        return([siren, np.nan, np.nan])
    

results = []
for siren in tqdm(df.siren.values):
    results.append(get_consolidated_CA(siren))
df_CA = pd.DataFrame(np.array(results), columns=["siren", "denomination", "FJ"])

HBox(children=(FloatProgress(value=0.0, max=1711.0), HTML(value='')))

Siren 59804062 not found
Siren 60200128 not found
Siren 66305277 not found
Siren 85980357 not found
Siren 86380730 not found
Siren 130009186 not found
Siren 130012024 not found
Siren 179101316 not found
Siren 180006025 not found
Siren 180020026 not found
Siren 180036014 not found
Siren 180036048 not found
Siren 180044174 not found
Siren 180046021 not found
Siren 180046237 not found
Siren 180046252 not found
Siren 180046260 not found
Siren 180060030 not found
Siren 180070039 not found
Siren 180089013 not found
Siren 180092447 not found
Siren 183500016 not found
Siren 187509013 not found
Siren 200002970 not found
Siren 200011203 not found
Siren 200011385 not found
Siren 200011898 not found
Siren 200017986 not found
Siren 200018034 not found
Siren 200018703 not found
Siren 200023059 not found
Siren 200023091 not found
Siren 200026250 not found
Siren 200026433 not found
Siren 200026888 not found
Siren 200027092 not found
Siren 200027233 not found
Siren 200029320 not found
Siren 200029445 n

Siren 267100766 not found
Siren 267100790 not found
Siren 267200160 not found
Siren 267201069 not found
Siren 267205482 not found
Siren 267300044 not found
Siren 267311090 not found
Siren 267400026 not found
Siren 267400844 not found
Siren 267411031 not found
Siren 267411080 not found
Siren 267500049 not found
Siren 267500452 not found
Siren 267500643 not found
Siren 267601615 not found
Siren 267601680 not found
Siren 267601714 not found
Siren 267601722 not found
Siren 267601763 not found
Siren 267602175 not found
Siren 267700052 not found
Siren 267700086 not found
Siren 267800076 not found
Siren 267800092 not found
Siren 267800100 not found
Siren 267802387 not found
Siren 267802403 not found
Siren 267802445 not found
Siren 267802718 not found
Siren 267805778 not found
Siren 267805802 not found
Siren 267900017 not found
Siren 268000015 not found
Siren 268000148 not found
Siren 268000296 not found
Siren 268100013 not found
Siren 268100054 not found
Siren 268100088 not found
Siren 268200

Siren 440536555 not found
Siren 440546018 not found
Siren 440658102 not found
Siren 440676559 not found
Siren 441921913 not found
Siren 441921962 not found
Siren 443146873 not found
Siren 443577739 not found
Siren 444182687 not found
Siren 444718563 not found
Siren 444953830 not found
Siren 445043326 not found
Siren 445174675 not found
Siren 445200488 not found
Siren 445331192 not found
Siren 447982125 not found
Siren 450314802 not found
Siren 451221295 not found
Siren 452807100 not found
Siren 453207243 not found
Siren 453664393 not found
Siren 456504851 not found
Siren 477180186 not found
Siren 478834930 not found
Siren 478951080 not found
Siren 479766842 not found
Siren 479927915 not found
Siren 479942583 not found
Siren 480061928 not found
Siren 480266014 not found
Siren 483018370 not found
Siren 484592522 not found
Siren 487625436 not found
Siren 488404823 not found
Siren 491167839 not found
Siren 491668893 not found
Siren 492787957 not found
Siren 492826417 not found
Siren 493373

In [4]:
# reformat siren to have aligned strings
df["siren"] = df["siren"].apply(lambda siren: str(siren).zfill(9))

# Not found siren are public services like public transports
df_CA = df_CA[df_CA.denomination.notna()]

# merge the information to insee listing
df = df.merge(df_CA, on="siren", how="left")
# save
df.to_csv("../../data/processed/Entreprises/entreprises_effectifs_sup_500_post_2017_with_CA.csv", sep=";")

# Issue on github for getting more CA

Minimal example because something is wrong with ORANO - https://github.com/phe-sto/enthic/issues/1

Ref for CA : https://www.orano.group/docs/default-source/orano-doc/finance/publications-financieres-et-reglementees/2018/resultats-annuels-2018/orano_comptes-consolides_31122018.pdf?sfvrsn=e8b5c416_6


In [86]:
import requests
from pprint import pprint

response = requests.get("http://api.enthic.fr/company/siren/330956871/2018/").json()
pprint(response["financial_data"][21])
print("{} is {} billions € instead of 3.623 \n ".format(121086000.0, 121086000.0/(10**9)))
pprint(response)

{'FJ': {'account': 'Compte annuel complet',
        'description': 'Chiffres d’affaires nets',
        'value': 121086000.0}}
121086000.0 is 0.121086 billions € instead of 3.623 
 
{'ape': {'description': 'Code Activité Principale Exercée (NAF)',
         'value': 'Fonds de placement et entités financières similaires'},
 'denomination': {'description': 'Dénomination', 'value': 'ORANO'},
 'devise': {'description': 'Devise', 'value': 'Euro'},
 'financial_data': [{'FR': {'account': 'Compte annuel complet',
                            'description': 'Total des produits d’exploitation',
                            'value': 124966000.0}},
                    {'FW': {'account': 'Compte annuel complet',
                            'description': 'Autres achats et charges externes',
                            'value': 148141000.0}},
                    {'FX': {'account': 'Compte annuel complet',
                            'description': 'Impôts, taxes et versements '
                         

In [70]:
(7848000000.0/(10**9))/2

3.924

In [65]:
(121086000.0/(1000000000))*30

3.63258