In [None]:
from collections import Counter
import os

import pandas as pd
import nest_asyncio
import seaborn as sns
import twint

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_colwidth', 150)
nest_asyncio.apply()

In [None]:
def get_timeline(username, search=None, since=None, verbose=False, fname=None):
    # Configure
    c = twint.Config()
    c.Username = username
    if search:
        c.Search = search
    
    c.Store_csv = True
    c.Output = fname if fname else f"./timelines/{username}.csv"
    c.Since = since if since else "2020-01-01"
    c.Hide_output = not verbose

    # Run
    twint.run.Search(c)


def download_all_users(users, base_path="/home/cash/output/prise_de_parole",**kwargs):
    for group in users:
        print(group)
        os.makedirs(f"{base_path}/timelines/{group}", exist_ok=True)

        for username in users[group]:
            print("\t", username)
            fname = f"{base_path}/timelines/{group}/{username}.csv"

            if os.path.exists(fname):
                continue

            try:
                get_timeline(username=username, fname=fname, **kwargs)
            except ValueError:
                print(f"\t\tNOT FOUND!")
            else:
                print(f"\t\tFINISH!")


def import_group_df(users, group, base_path="/home/cash/output/prise_de_parole", verbose=False):
    lst = []

    for username in users[group]:
        if verbose:
            print("\t", username)
        fname = f"{base_path}/timelines/{group}/{username}.csv"
        try:
            lst += [pd.read_csv(fname)]
        except FileNotFoundError:
            if verbose:
                print(f"{username} NOT FOUND!")

    df = pd.concat(lst, axis=0)
    df["has_covid"] = df.tweet.str.contains(pat=COVID_PATTERN, case=False)
    
    return df

fédérations 

Mercatel, Perifem, FCD (fédération du Commerce et la distribution)

In [None]:
COVID_PATTERN = "covid|coronavirus|virus"

USERS = {
    "banque": ["societegenerale", "SG_etVous", # societe generale
               "cmarkea", "CreditMutuel", "CreditMutuelAF", # credit mutuel
               "CreditAgricole", "ca_ndf", "CAPEcouteClient", "CA_info", # credit agricole
               "CA_IDF", "ca_centrest", "CA_FranceComte", "CA_BriePicardie", "CA_Toulouse31", "Credit_Agri_TP", "CANSeine" ,# CA region
               "CAAquitaine", "CAdelacorse", "credit_agri_PCA", "credit_agri_PG" ,"CASudRhoneAlpes" ,"CA_SudMed", # CA region
               "cafproaix" , "CA_CentreLoire", "C_A_Lorraine" ,"ca_nordest", "CA_alp_prov", "CA_LHL" , "CAIlleEtVilaine", "Credit_Agri_TP", # CA region
               "CAChampBourg", "ca_finistere", "CA_cotesdarmor", "CAAnjouMaine", "CALanguedoc", "CA_LCL", "CA_Des_Savoie", "CA_Des_Savoie",# CA region
               "Caisse_Epargne", "FNCE", # caisse epargne (CE)
               "CaissEpargneBPL", "CaissEpargneGEE", "CaissEpargneMP", "CaissEpargneLDA", "CaissepargneLR", # CE region
               "CE_LoireCentre", "CE_RhoneAlpes", "CE_Normandie", "CE_CEPAC", "CEHDF", "CE_iledefrance", # CE region
               "cic", "BNPParibas", "LaBanquePostale", "GroupeBPCE", "LCL", "FBFFrance", "Boursorama", "INGFrance", "BPEbanqueprivee", "creditdunord", "BanquePopulaire",
              ],
    
    "gouvernement": [
        "gouvernementFR", "Sante_Gouv", "olivierveran", "HAS_sante", "SantePubliqueFr", "Economie_Gouv", "Elysee", "DGEntreprises", "servicepublicfr", "dgfip_officiel",
        "DocCedef", "dgccrf", "Interieur_Gouv",
    ],
    
    "federations": [
        "FCDfrance", "assoperifem", 
    ]
}

# 1. Query

In [None]:
download_all_users(USERS)

# 2. Analysis

In [None]:
df = pd.concat([import_group_df(users=USERS, group=group).assign(group=group) for group in ["banque", "gouvernement", "federations"]])

In [None]:
df[lambda df: df.has_covid].groupby(by=["group"]).size()

In [None]:
most_prolific = df[lambda df: df.has_covid].groupby(by=["group", "username"], as_index=False).size().sort_values("size", ascending=False).head(20)
ax = sns.catplot(data=most_prolific, x="username", y="size", hue="group", height=7, aspect=1.6)
ax.set_xticklabels(rotation=45)

In [None]:
for group, dfgroup in df.groupby("group"):
    print(group)
    print(dfgroup[lambda df: df.has_covid][["username", "tweet"]])