In [43]:
import numpy as np
import os
import pandas as pd
import shutil

In [44]:
# List of ROR to exclude from results
excluded_rors = [
    "02n6c9837", # Sanofi
    "01mqmer16", # Hoffmann-La Roche
    "02f9zrr09", # Novartis
    "04r9x1a08", # AstraZeneca
    "00gtmwv55", # Bristol-Myers Squibb
    "03g03ge92"  # Amgen
]

cle_false = "SANS communication\nidentifiée"
cle_true = "AVEC communication\nidentifiée"

LAST_YEAR = 2023

In [45]:
# Clear previous results
os.system(f"rm -Rf ./../publipostage2")
os.system(f"mkdir -p ./../publipostage2")

0

In [46]:
# Load all national data about clinical trials
url = "https://storage.gra.cloud.ovh.net/v1/AUTH_32c5d10cb0fe4519b957064a111717e3/bso_dump/bso-clinical-trials.jsonl.gz"
df = pd.read_json(url, lines=True)

In [47]:
# Mapping to normalize sponsor name and match to ROR
mapping = pd.read_csv("https://raw.githubusercontent.com/dataesr/bso-clinical-trials/main/bsoclinicaltrials/server/main/bso-lead-sponsors-mapping.csv")

In [48]:
columns = ["lead_sponsor", "study_completion_year", "NCTId", "eudraCT", "CTIS", "study_type", "status_simplified",
          "acronym", "title", "has_results_or_publications", "has_results_or_publications_within_1y",
          "has_results_or_publications_within_3y", "intervention_type"]
df_interventional = df[df.study_type=="Interventional"]
df_interventional = df_interventional[columns]
df_interventional_completed = df[(df.study_type=="Interventional") & (df.status_simplified=="Completed")]
df_interventional_completed = df_interventional_completed[columns]
df_interventional_academic = df[(df.study_type=="Interventional") & (df.lead_sponsor_type=="academique")]
df_interventional_academic = df_interventional_academic[columns]
df_interventional_completed_academic = df[(df.study_type=="Interventional") & (df.status_simplified=="Completed") & (df.lead_sponsor_type=="academique")]
df_interventional_completed_academic = df_interventional_completed_academic[columns]
dd = pd.merge(df_interventional_completed, mapping, left_on="lead_sponsor", right_on="sponsor", indicator=True, how="inner")
dd_ror = dd[dd.ror.apply(lambda x:isinstance(x, str) and "ror.org/" in x)]
dd_ror.loc[:, "ror_simple"] = dd_ror.ror.apply(lambda x:x.split("/")[-1])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dd_ror.loc[:, "ror_simple"] = dd_ror.ror.apply(lambda x:x.split("/")[-1])


In [49]:
def get_percent(df_data):
    stats = {"results_last_10_years": {}, "results_last_10_years_drug": {}, "results_within_1_year_last_10_years": {}, "results_within_1_year_last_10_years_drug": {}}
    number_of_trials = len(df_data.index)
    data_results_last_10_years = pd.DataFrame(df_data[(df_data.study_completion_year > LAST_YEAR - 10) & (df_data.study_completion_year <= LAST_YEAR)].has_results_or_publications.value_counts(dropna=False))
    data_results_last_10_years_drug = pd.DataFrame(df_data[(df_data.study_completion_year > LAST_YEAR - 10) & (df_data.study_completion_year <= LAST_YEAR) & (df_data.intervention_type == "DRUG")].has_results_or_publications.value_counts(dropna=False))
    data_results_within_1_year_last_10_years = pd.DataFrame(df_data[(df_data.study_completion_year > LAST_YEAR - 10) & (df_data.study_completion_year <= LAST_YEAR)].has_results_or_publications_within_1y.value_counts(dropna=False))
    data_results_within_1_year_last_10_years_drug = pd.DataFrame(df_data[(df_data.study_completion_year > LAST_YEAR - 10) & (df_data.study_completion_year <= LAST_YEAR) & (df_data.intervention_type == "DRUG")].has_results_or_publications_within_1y.value_counts(dropna=False))
    try:
        stats["results_last_10_years"][cle_true] = data_results_last_10_years[data_results_last_10_years.index==True].values[0][0]
    except Exception as e:
        print(e)
        stats["results_last_10_years"][cle_true] = 0
    try:
        stats["results_last_10_years"][cle_false] = data_results_last_10_years[data_results_last_10_years.index==False].values[0][0]
    except Exception as e:
        print(e)
        stats["results_last_10_years"][cle_false] = 0
    try:
        stats["results_last_10_years_drug"][cle_true] = data_results_last_10_years_drug[data_results_last_10_years_drug.index==True].values[0][0]
    except Exception as e:
        print(e)
        stats["results_last_10_years_drug"][cle_true] = 0
    try:
        stats["results_last_10_years_drug"][cle_false] = data_results_last_10_years_drug[data_results_last_10_years_drug.index==False].values[0][0]
    except Exception as e:
        print(e)
        stats["results_last_10_years_drug"][cle_false] = 0
    try:
        stats["results_within_1_year_last_10_years"][cle_true] = data_results_within_1_year_last_10_years[data_results_within_1_year_last_10_years.index==True].values[0][0]
    except Exception as e:
        print(e)
        stats["results_within_1_year_last_10_years"][cle_true] = 0
    try:
        stats["results_within_1_year_last_10_years"][cle_false] = data_results_within_1_year_last_10_years[data_results_within_1_year_last_10_years.index==False].values[0][0]
    except Exception as e:
        print(e)
        stats["results_within_1_year_last_10_years"][cle_false] = 0
    try:
        stats["results_within_1_year_last_10_years_drug"][cle_true] = data_results_within_1_year_last_10_years_drug[data_results_within_1_year_last_10_years_drug.index==True].values[0][0]
    except Exception as e:
        print(e)
        stats["results_within_1_year_last_10_years_drug"][cle_true] = 0
    try:
        stats["results_within_1_year_last_10_years_drug"][cle_false] = data_results_within_1_year_last_10_years_drug[data_results_within_1_year_last_10_years_drug.index==False].values[0][0]
    except Exception as e:
        print(e)
        stats["results_within_1_year_last_10_years_drug"][cle_false] = 0
    try:
        number_of_trials_last_10_years = stats["results_last_10_years"][cle_false] + stats["results_last_10_years"][cle_true]
        results_last_10_years = int(round(100 * stats["results_last_10_years"][cle_true] / (stats["results_last_10_years"][cle_false] + stats["results_last_10_years"][cle_true]), 0))
        results_last_10_years = f"{results_last_10_years} %"
    except ZeroDivisionError:
        results_last_10_years = np.nan
    try:
        number_of_trials_last_10_years_drug = stats["results_last_10_years_drug"][cle_false] + stats["results_last_10_years_drug"][cle_true]
        results_last_10_years_drug = int(round(100 * stats["results_last_10_years_drug"][cle_true] / (stats["results_last_10_years_drug"][cle_false] + stats["results_last_10_years_drug"][cle_true]), 0))
        results_last_10_years_drug = f"{results_last_10_years_drug} %"
    except ZeroDivisionError:
        results_last_10_years_drug = np.nan
    try:
        number_of_trials_within_1_year_last_10_years = stats["results_within_1_year_last_10_years"][cle_false] + stats["results_within_1_year_last_10_years"][cle_true]
        results_within_1_year_last_10_years = int(round(100 * stats["results_within_1_year_last_10_years"][cle_true] / (stats["results_within_1_year_last_10_years"][cle_false] + stats["results_within_1_year_last_10_years"][cle_true]), 0))
        results_within_1_year_last_10_years = f"{results_within_1_year_last_10_years} %"
    except ZeroDivisionError:
        results_within_1_year_last_10_years = np.nan
    try:
        number_of_trials_within_1_year_last_10_years_drug = stats["results_within_1_year_last_10_years_drug"][cle_false] + stats["results_within_1_year_last_10_years_drug"][cle_true]
        results_within_1_year_last_10_years_drug = int(round(100 * stats["results_within_1_year_last_10_years_drug"][cle_true] / (stats["results_within_1_year_last_10_years_drug"][cle_false] + stats["results_within_1_year_last_10_years_drug"][cle_true]), 0))
        results_within_1_year_last_10_years_drug = f"{results_within_1_year_last_10_years_drug} %"
    except ZeroDivisionError:
        results_within_1_year_last_10_years_drug = np.nan
    return f"{number_of_trials}", f"{number_of_trials_last_10_years}", f"{results_last_10_years}", f"{number_of_trials_last_10_years_drug}", f"{results_last_10_years_drug}", f"{number_of_trials_within_1_year_last_10_years}", f"{results_within_1_year_last_10_years}", f"{number_of_trials_within_1_year_last_10_years_drug}", f"{results_within_1_year_last_10_years_drug}"

In [50]:
print("france")
indicators_france = get_percent(df_interventional_completed)
print(indicators_france)
print("academique")
indicators_academic = get_percent(df_interventional_completed_academic)
print(indicators_academic)

france
('16414', '9608', '51 %', '3629', '77 %', '9608', '33 %', '3629', '55 %')
academique
('6823', '4757', '29 %', '873', '46 %', '4757', '13 %', '873', '20 %')


In [51]:
dd_perim = pd.merge(df_interventional, mapping, left_on="lead_sponsor", right_on="sponsor", indicator=True, how="inner")
dd_perim_ror = dd_perim[dd_perim.ror.apply(lambda x:isinstance(x, str) and "ror.org/" in x)]
dd_perim_ror["ror_simple"] = dd_perim_ror.ror.apply(lambda x:x.split("/")[-1])
df_perimetre = pd.DataFrame(dd_perim_ror.ror_simple.value_counts()).reset_index()
df_perimetre.columns = ["ror", "nb_interventional_academic"]
df_perimetre = df_perimetre[df_perimetre.ror.apply(lambda x:x not in excluded_rors)]
df_name = dd_perim_ror[["ror_simple", "sponsor_normalized"]].drop_duplicates()
df_perimetre2 = pd.merge(df_perimetre, df_name, left_on="ror", right_on="ror_simple", how="left")
for index, row in df_perimetre2.iterrows():
    df_perimetre2.loc[index, "nb_interventional_academic_completed"] = len(dd_ror[dd_ror["ror_simple"] == row["ror"]])
df_perimetre2["nb_interventional_academic_completed"] = df_perimetre2["nb_interventional_academic_completed"].apply(lambda x:int(x))
df_perimetre2[["ror", "sponsor_normalized", "nb_interventional_academic", "nb_interventional_academic_completed"]].to_csv("../publipostage2/perimetre.csv", index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dd_perim_ror["ror_simple"] = dd_perim_ror.ror.apply(lambda x:x.split("/")[-1])


In [52]:
def get_status_color(row):
    if row.has_results_or_publications is False:
        return "⬛"
    if row.has_results_or_publications_within_1y is False and row.has_results_or_publications_within_3y is False:
        return "🟥"
    if row.has_results_or_publications_within_1y is False and row.has_results_or_publications_within_3y is True:
        return "🟧"
    if row.has_results_or_publications_within_1y is True and row.has_results_or_publications_within_3y is True:
        return "🟩"

def get_status(row):
    if row.has_results_or_publications is False:
        return "4"
    if row.has_results_or_publications_within_1y is False and row.has_results_or_publications_within_3y is False:
        return "3"
    if row.has_results_or_publications_within_1y is False and row.has_results_or_publications_within_3y is True:
        return "2"
    if row.has_results_or_publications_within_1y is True and row.has_results_or_publications_within_3y is True:
        return "1"

def get_status_name(row):
    if row.has_results_or_publications is False:
        return "4 : pas de résultats postés ni publiés"
    if row.has_results_or_publications_within_1y is False and row.has_results_or_publications_within_3y is False:
        return "3 : résultats postés ou publiés après les 36 mois"
    if row.has_results_or_publications_within_1y is False and row.has_results_or_publications_within_3y is True:
        return "2 : résultats postés ou publiés entre 12 et 36 mois"
    if row.has_results_or_publications_within_1y is True and row.has_results_or_publications_within_3y is True:
        return "1 : résultats postés ou publiés dans les 12 mois"

In [53]:
def clean_year(y):
    try:
        return str(int(y))
    except:
        return None

In [54]:
def make_data(ror):
    os.system(f"mkdir -p ./../publipostage2/{ror}")
    df_tmp = dd_ror[dd_ror.ror_simple == ror].sort_values(by="study_completion_year")
    sponsor_names = pd.DataFrame(df_tmp.lead_sponsor.value_counts()).reset_index()
    sponsor_names.columns = ["variant", "number_of_trials"]
    sponsor_names[["variant"]].to_csv(f"./../publipostage2/{ror}/liste_variantes_noms_{ror}.csv", index=False, header=False)
    df_tmp["status_color"] = df_tmp.apply(lambda row: get_status_color(row), axis=1)
    df_tmp["status"] = df_tmp.apply(lambda row: get_status(row), axis=1)
    df_tmp["status_name"] = df_tmp.apply(lambda row: get_status_name(row), axis=1)
    global_stat = {"ror": ror}
    if len(df_tmp) == 0:
        print(f"No selected clinical trials for {ror}")
        return
    try:
        global_stat["sponsor_name"] = df_tmp.sponsor_normalized.values[0]
    except:
        print("skip " + ror)
        return
    global_stat["sponsor_article"] = "du"
    if global_stat["sponsor_name"][0] in ["A", "E", "I", "O", "U", "Y"]:
        global_stat["sponsor_article"] = "de l'"
    if global_stat["sponsor_name"].split(" ")[0].lower() in ["hospices", "hopitaux", "hôpitaux"]:
        global_stat["sponsor_article"] = "des"
    if global_stat["sponsor_name"].split(" ")[0].lower() in ["hopital", "hôpital"]:
        global_stat["sponsor_article"] = "de l'"
    if global_stat["sponsor_name"].split(" ")[0].lower() in ["clinique"]:
        global_stat["sponsor_article"] = "de la"
    global_stat["number_of_trials"], global_stat["number_of_trials_10_years"], global_stat["indicator_10_years"], global_stat["number_of_trials_10_years_drug"], global_stat["indicator_10_years_drug"], global_stat["number_of_trials_10_years_within_12_months"], global_stat["indicator_10_years_within_12_months"], global_stat["number_of_trials_10_years_within_12_months_drug"], global_stat["indicator_10_years_within_12_months_drug"] = get_percent(df_tmp)
    _, global_stat["number_of_trials_10_years_france"], global_stat["indicator_10_years_france"], global_stat["number_of_trials_10_years_drug_france"], global_stat["indicator_10_years_drug_france"], global_stat["number_of_trials_10_years_within_12_months_france"], global_stat["indicator_10_years_within_12_months_france"], global_stat["number_of_trials_10_years_within_12_months_drug_france"], global_stat["indicator_10_years_within_12_months_drug_france"] = indicators_france
    pd.DataFrame([global_stat]).to_csv(f"./../publipostage2/{ror}/indicators_{ror}.csv", index=False)
    df_tmp["study_completion_year"] = df_tmp["study_completion_year"].apply(lambda x:clean_year(x))
    df_liste_essais_csv = df_tmp[["status_color", "status", "status_name", "NCTId", "eudraCT", "CTIS", "study_completion_year", "title", "acronym",
                                  "intervention_type"]]
    df_liste_essais_csv.columns = ["statut_color", "statut", "statut_name", "NCTId", "eudraCT", "CTIS", "completion_year", "clinical_trial_title",
                                   "acronym", "intervention_type"]
    df_liste_essais_csv.to_csv(f"./../publipostage2/{ror}/liste_essais_cliniques_identifies_{ror}.csv", index=False)
    df_liste_essais_excel = df_tmp[["status", "status_name", "NCTId", "eudraCT", "CTIS", "study_completion_year", "title", "acronym",
                                    "intervention_type"]]
    df_liste_essais_excel.columns = ["statut", "statut_name", "NCTId", "eudraCT", "CTIS", "completion_year", "clinical_trial_title",
                                     "acronym", "intervention_type"]
    df_liste_essais_excel.to_excel(f"./../publipostage2/{ror}/liste_essais_cliniques_identifies_{ror}.xlsx", index=False)
    # excel
    writer = pd.ExcelWriter(f"./../publipostage2/{ror}/liste_essais_cliniques_identifies_{ror}.xlsx") 
    df_liste_essais_excel.to_excel(writer, sheet_name='liste_essais', index=False, na_rep='NaN')
    for column in df_liste_essais_excel:
        column_length = min(max(df_liste_essais_excel[column].astype(str).map(len).max(), len(column)), 40)
        col_idx = df_liste_essais_excel.columns.get_loc(column)
        #print(ror, col_idx, column, column_length)
        writer.sheets['liste_essais'].set_column(col_idx, col_idx, column_length)
    writer.close()  # (writer.save() was deprecated and removed as of 2023/2024)
    
    return global_stat

In [55]:
global_data = []
rors_to_compute = df_perimetre.ror.to_list()
number_of_trials, number_of_trials_10_years, indicator_10_years, number_of_trials_10_years_drug, indicator_10_years_drug, number_of_trials_10_years_within_12_months, indicator_10_years_within_12_months, number_of_trials_10_years_within_12_months_drug, indicator_10_years_within_12_months_drug = indicators_france
global_data.append({
    "ror": "",
    "sponsor_name": "France",
    "sponsor_article": "",
    "number_of_trials": number_of_trials,
    "number_of_trials_10_years": number_of_trials_10_years,
    "indicator_10_years": indicator_10_years,
    "number_of_trials_10_years_drug": number_of_trials_10_years_drug,
    "indicator_10_years_drug": indicator_10_years_drug,
    "number_of_trials_10_years_within_12_months": number_of_trials_10_years_within_12_months,
    "indicator_10_years_within_12_months": indicator_10_years_within_12_months,
    "number_of_trials_10_years_within_12_months_drug": number_of_trials_10_years_within_12_months_drug,
    "indicator_10_years_within_12_months_drug": indicator_10_years_within_12_months_drug
})
number_of_trials, number_of_trials_10_years, indicator_10_years, number_of_trials_10_years_drug, indicator_10_years_drug, number_of_trials_10_years_within_12_months, indicator_10_years_within_12_months, number_of_trials_10_years_within_12_months_drug, indicator_10_years_within_12_months_drug = indicators_academic
global_data.append({
    "ror": "",
    "sponsor_name": "Promoteurs académiques",
    "sponsor_article": "",
    "number_of_trials": number_of_trials,
    "number_of_trials_10_years": number_of_trials_10_years,
    "indicator_10_years": indicator_10_years,
    "number_of_trials_10_years_drug": number_of_trials_10_years_drug,
    "indicator_10_years_drug": indicator_10_years_drug,
    "number_of_trials_10_years_within_12_months": number_of_trials_10_years_within_12_months,
    "indicator_10_years_within_12_months": indicator_10_years_within_12_months,
    "number_of_trials_10_years_within_12_months_drug": number_of_trials_10_years_within_12_months_drug,
    "indicator_10_years_within_12_months_drug": indicator_10_years_within_12_months_drug
})
for ix, current_ror in enumerate(rors_to_compute):
    if current_ror in excluded_rors:
        continue
    m = make_data(current_ror)
    if m:
        global_data.append({
            "ror": m["ror"],
            "sponsor_name": m["sponsor_name"],
            "sponsor_article": m["sponsor_article"],
            "number_of_trials": m["number_of_trials"],
            "number_of_trials_10_years": m["number_of_trials_10_years"],
            "indicator_10_years": m["indicator_10_years"],
            "number_of_trials_10_years_drug": m["number_of_trials_10_years_drug"],
            "indicator_10_years_drug": m["indicator_10_years_drug"],
            "number_of_trials_10_years_within_12_months": m["number_of_trials_10_years_within_12_months"],
            "indicator_10_years_within_12_months": m["indicator_10_years_within_12_months"],
            "number_of_trials_10_years_within_12_months_drug": m["number_of_trials_10_years_within_12_months_drug"],
            "indicator_10_years_within_12_months_drug": m["indicator_10_years_within_12_months_drug"]
        })
# Write a file to expose the stats for each academic sponsor
pd.DataFrame(global_data).to_csv(f"./../publipostage2/indicators.csv", index=False)

00pg5jh14 0 statut 6
00pg5jh14 1 statut_name 40
00pg5jh14 2 NCTId 11
00pg5jh14 3 eudraCT 14
00pg5jh14 4 CTIS 17
00pg5jh14 5 completion_year 15
00pg5jh14 6 clinical_trial_title 40
00pg5jh14 7 acronym 27
00pg5jh14 8 intervention_type 19
01502ca60 0 statut 6
01502ca60 1 statut_name 40
01502ca60 2 NCTId 11
01502ca60 3 eudraCT 14
01502ca60 4 CTIS 4
01502ca60 5 completion_year 15
01502ca60 6 clinical_trial_title 40
01502ca60 7 acronym 40
01502ca60 8 intervention_type 18
index 0 is out of bounds for axis 0 with size 0
00mthsf17 0 statut 6
00mthsf17 1 statut_name 40
00mthsf17 2 NCTId 11
00mthsf17 3 eudraCT 14
00mthsf17 4 CTIS 4
00mthsf17 5 completion_year 15
00mthsf17 6 clinical_trial_title 40
00mthsf17 7 acronym 27
00mthsf17 8 intervention_type 18
017h5q109 0 statut 6
017h5q109 1 statut_name 40
017h5q109 2 NCTId 11
017h5q109 3 eudraCT 14
017h5q109 4 CTIS 4
017h5q109 5 completion_year 15
017h5q109 6 clinical_trial_title 40
017h5q109 7 acronym 40
017h5q109 8 intervention_type 19
002cp4060 0 sta

04t0gwh46 6 clinical_trial_title 40
04t0gwh46 7 acronym 14
04t0gwh46 8 intervention_type 17
03xfq7a50 0 statut 6
03xfq7a50 1 statut_name 40
03xfq7a50 2 NCTId 11
03xfq7a50 3 eudraCT 14
03xfq7a50 4 CTIS 4
03xfq7a50 5 completion_year 15
03xfq7a50 6 clinical_trial_title 40
03xfq7a50 7 acronym 12
03xfq7a50 8 intervention_type 17
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
04s3t1g37 0 statut 6
04s3t1g37 1 statut_name 40
04s3t1g37 2 NCTId 11
04s3t1g37 3 eudraCT 14
04s3t1g37 4 CTIS 4
04s3t1g37 5 completion_year 15
04s3t1g37 6 clinical_trial_title 40
04s3t1g37 7 acronym 17
04s3t1g37 8 intervention_type 17
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
046bx1082 0 statut 6
046bx1082 1 statut_name 40
046bx1082 2 NCTId 11
046bx1082 3 eudraCT 7
046bx1082 4 CTIS 4
046bx1082 5 completion_year 15
046bx1082 6 clinical_trial_title 40
046bx1082 7 acronym 12


03ytpa045 7 acronym 10
03ytpa045 8 intervention_type 17
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
0219xsk19 0 statut 6
0219xsk19 1 statut_name 38
0219xsk19 2 NCTId 11
0219xsk19 3 eudraCT 14
0219xsk19 4 CTIS 4
0219xsk19 5 completion_year 15
0219xsk19 6 clinical_trial_title 40
0219xsk19 7 acronym 21
0219xsk19 8 intervention_type 17
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
029brtt94 0 statut 6
029brtt94 1 statut_name 40
029brtt94 2 NCTId 11
029brtt94 3 eudraCT 7
029brtt94 4 CTIS 4
029brtt94 5 completion_year 15
029brtt94 6 clinical_trial_title 40
029brtt94 7 acronym 12
029brtt94 8 intervention_type 17
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size

03er61e50 0 statut 6
03er61e50 1 statut_name 40
03er61e50 2 NCTId 11
03er61e50 3 eudraCT 7
03er61e50 4 CTIS 4
03er61e50 5 completion_year 15
03er61e50 6 clinical_trial_title 40
03er61e50 7 acronym 7
03er61e50 8 intervention_type 17
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
03hypw319 0 statut 6
03hypw319 1 statut_name 38
03hypw319 2 NCTId 11
03hypw319 3 eudraCT 7
03hypw319 4 CTIS 4
03hypw319 5 completion_year 15
03hypw319 6 clinical_trial_title 40
03hypw319 7 acronym 7
03hypw319 8 intervention_type 17
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
index 0 is out of bounds for axis 0 with size 0
index 0 

In [56]:
shutil.make_archive('./../publipostage2', 'zip', './../publipostage2')

'/Users/eric/MESRI/bso-clinical-trials/publipostage2.zip'