In [1]:
import os
import pdfplumber
import pandas as pd
import re

# Function to extract specific text from PDF
def extract_info_from_pdf(pdf_file):
    with pdfplumber.open(pdf_file) as pdf:
        text = ""
        for page in pdf.pages:
            text += page.extract_text()
    
    # Define regex patterns for information extraction
    name_pattern = r"PERSONNE ETROITEMENT LIEE :(.*?)(?=,|$)"
    position_pattern = r",(.*?)(?=NOTIFICATION|$)"
    company_name_pattern = r"NOM :(.*?)(?=LEI|DETAIL|$)" # LEI or DETAIL to stop
    date_pattern = r"DATE DE LA TRANSACTION :(.*?)(?=LIEU|$)"
    nature_pattern = r"NATURE DE LA TRANSACTION :(.*?)(?=DESCRIPTION|$)"
    price_pattern = r"PRIX :(.*?)(?=Euro|Dollar|Livre|Franc|$)"
    volume_pattern = r"VOLUME :((?:(?!VOLUME :|TRANSACTION).)*)\s*TRANSACTION" # last occurence
    date_notification_pattern = r"DATE DE RECEPTION DE LA NOTIFICATION :(.*?)(?=COMMENTAIRES|$)"

    name = re.search(name_pattern, text, re.DOTALL)
    position = re.search(position_pattern, text, re.DOTALL)
    company_name = re.search(company_name_pattern, text, re.DOTALL)
    date_transaction = re.search(date_pattern, text, re.DOTALL)
    nature = re.search(nature_pattern, text, re.DOTALL)
    price = re.search(price_pattern, text, re.DOTALL)
    volume = re.search(volume_pattern, text, re.DOTALL)
    date_notification = re.search(date_notification_pattern, text, re.DOTALL)

    # Extracted information
    def convert_to_text(re_search_result):
        return re_search_result.group(1).strip() if re_search_result else ""
    
    name_text = convert_to_text(name)
    position_text = convert_to_text(position)
    company_name_text = convert_to_text(company_name)
    date_transaction_text = convert_to_text(date_transaction)
    nature_text = convert_to_text(nature)
    price_text = convert_to_text(price)
    volume_text = convert_to_text(volume)
    date_notification_text = convert_to_text(date_notification)

    text_dict = {'name': name_text, 'position': position_text, 'company_name': company_name_text,
                 'date_transaction': date_transaction_text, 'date_notification': date_notification_text,
                 'nature': nature_text, 'price': price_text, 'volume': volume_text}
    
    return text_dict

folder_path = "/Users/alexis/Downloads"

# Find all PDF files in the folder
pdf_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith(".pdf")]
print(f'list of PDF files: {pdf_files}')
print(f'number of PDF files: {len(pdf_files)}')

data = []

# Extract information from each PDF and store it in the dataframe
for file in pdf_files:
    new_info_dict = extract_info_from_pdf(file)
    data.append(new_info_dict)
df = pd.DataFrame(data)

df.head(10)

list of PDF files: ['/Users/alexis/Downloads/DD_22_873197_9545767.pdf', '/Users/alexis/Downloads/DD_22_873757_9553231.pdf', '/Users/alexis/Downloads/DD_22_877789_9603925.pdf', '/Users/alexis/Downloads/DD_22_876817_9591571.pdf', '/Users/alexis/Downloads/DD_22_865073_9445323.pdf', '/Users/alexis/Downloads/DD_22_867041_9468981.pdf', '/Users/alexis/Downloads/DD_22_862191_9410349.pdf', '/Users/alexis/Downloads/DD_22_877384_9598691.pdf', '/Users/alexis/Downloads/DD_22_864573_9439220.pdf', '/Users/alexis/Downloads/DD_22_860657_9391726.pdf', '/Users/alexis/Downloads/DD_22_866275_9459891.pdf', '/Users/alexis/Downloads/DD_22_872763_9540376.pdf', '/Users/alexis/Downloads/DD_22_867721_9478005.pdf', '/Users/alexis/Downloads/DD_22_859322_9374858.pdf', '/Users/alexis/Downloads/DD_22_869222_9496603.pdf', '/Users/alexis/Downloads/DD_22_866933_9467798.pdf', '/Users/alexis/Downloads/DD_22_877831_9604342.pdf', '/Users/alexis/Downloads/DD_22_860823_9393666.pdf', '/Users/alexis/Downloads/DD_22_866069_945749

Unnamed: 0,name,position,company_name,date_transaction,date_notification,nature,price,volume
0,ARJIL COMMANDITEE-ARCO SOCIETE ANONYME personn...,PRESIDENT DIRECTEUR\nGENERAL,LAGARDERE SA,22 novembre 2022,23 novembre 2022,Acquisition,18.9857,1 860.0000
1,The Home Bar Bevtech LTD personne morale liée ...,Président Directeur Général,SMART GOOD THINGS HOLDING S.A.,28 novembre 2022,28 novembre 2022,Cession,158.0,265.0000
2,Paul DU SAILLANT,Directeur Général Délégué,ESSILORLUXOTTICA,23 décembre 2022,29 décembre 2022,Cession,169.853,8 831.7470
3,Nicolas HIERONIMUS,Directeur Général et Administrateur,L'OREAL,16 décembre 2022,19 décembre 2022,DONATION DE LA NUE PROPRIETE EFFECTUEE,0.0,350 994.0000
4,paul lorne,Directeur Général Délégué,SPARTOO,04 octobre 2022,05 octobre 2022,Acquisition,1.23,4 470.0000
5,Guillaume Demulier,President du Directoire,ROCHE BOBOIS SA,01 juillet 2022,18 octobre 2022,Acquisition definitive d'actions gratuites,0.0,22 200.0000
6,MAKEMO CAPITAL SOCIETE PAR ACTIONS SIMPLIFIEE ...,"Président de AF&Co\nManagement, elle-même Géra...",TIKEHAU CAPITAL,20 septembre 2022,21 septembre 2022,Acquisition,25.1958,1 435.0000
7,Aymerick PENICAUT,PDG,ASHLER & MANSON,20 décembre 2022,22 décembre 2022,Cession,4.5,32.0000
8,SOBREDA SAS personne morale liée à JOEL SAUVAGET,ADMINISTRATEUR,ECOMIAM,30 septembre 2022,04 octobre 2022,Cession,5.72,138 800.0000
9,COMPAGNIE DE L’ODET Société Européenne personn...,Administrateur de Bolloré SE,BOLLORE SE,07 septembre 2022,12 septembre 2022,Acquisition,4.7696,1 052 150.0000


In [2]:
from datetime import datetime
import locale

# Set the French locale for month names
locale.setlocale(locale.LC_TIME, 'fr_FR.UTF-8')

# Convert the 'date' column to datetime format
df['date_transaction'] = pd.to_datetime(df['date_transaction'], format='%d %B %Y', errors='coerce')
df['date_notification'] = pd.to_datetime(df['date_notification'], format='%d %B %Y', errors='coerce')

# Reset the locale to the default setting
locale.setlocale(locale.LC_TIME, '')

df.head(10)


Unnamed: 0,name,position,company_name,date_transaction,date_notification,nature,price,volume
0,ARJIL COMMANDITEE-ARCO SOCIETE ANONYME personn...,PRESIDENT DIRECTEUR\nGENERAL,LAGARDERE SA,2022-11-22,2022-11-23,Acquisition,18.9857,1 860.0000
1,The Home Bar Bevtech LTD personne morale liée ...,Président Directeur Général,SMART GOOD THINGS HOLDING S.A.,2022-11-28,2022-11-28,Cession,158.0,265.0000
2,Paul DU SAILLANT,Directeur Général Délégué,ESSILORLUXOTTICA,2022-12-23,2022-12-29,Cession,169.853,8 831.7470
3,Nicolas HIERONIMUS,Directeur Général et Administrateur,L'OREAL,2022-12-16,2022-12-19,DONATION DE LA NUE PROPRIETE EFFECTUEE,0.0,350 994.0000
4,paul lorne,Directeur Général Délégué,SPARTOO,2022-10-04,2022-10-05,Acquisition,1.23,4 470.0000
5,Guillaume Demulier,President du Directoire,ROCHE BOBOIS SA,2022-07-01,2022-10-18,Acquisition definitive d'actions gratuites,0.0,22 200.0000
6,MAKEMO CAPITAL SOCIETE PAR ACTIONS SIMPLIFIEE ...,"Président de AF&Co\nManagement, elle-même Géra...",TIKEHAU CAPITAL,2022-09-20,2022-09-21,Acquisition,25.1958,1 435.0000
7,Aymerick PENICAUT,PDG,ASHLER & MANSON,2022-12-20,2022-12-22,Cession,4.5,32.0000
8,SOBREDA SAS personne morale liée à JOEL SAUVAGET,ADMINISTRATEUR,ECOMIAM,2022-09-30,2022-10-04,Cession,5.72,138 800.0000
9,COMPAGNIE DE L’ODET Société Européenne personn...,Administrateur de Bolloré SE,BOLLORE SE,2022-09-07,2022-09-12,Acquisition,4.7696,1 052 150.0000


In [3]:
# Ensure Price and Volume are Float values
df['price'] = df['price'].astype(str).str.replace(' ', '').astype(float)
df['volume'] = df['volume'].astype(str).str.replace(' ', '').astype(float)
df.tail(10)

Unnamed: 0,name,position,company_name,date_transaction,date_notification,nature,price,volume
1585,Patricia Barbizet,administrateur,TOTALENERGIES SE,2022-12-12,2022-12-14,Acquisition,56.43,1000.0
1586,Isabelle Andres,Membre du Comité exécutif,BELIEVE,2022-11-17,2022-11-18,Exercice,7.375,1500.0
1587,DIDIER BREDY,PDG,EKINOPS,2022-10-28,2022-10-28,Cession,6.8915,1276.0
1588,CFD CAPITAL SARL personne morale liée à Franço...,Président,LES CONSTRUCTEURS DU BOIS,2022-09-12,2022-09-13,Acquisition,4.82,90.0
1589,Francesco MILLERI,CHAIRMAN AND CEO,ESSILORLUXOTTICA,2022-10-03,2022-10-03,Acquisition,139.0,26600.0
1590,Gilles Auffret,Membre du conseil d'administration,ELIOR GROUP,2022-12-29,2022-12-29,Acquisition,3.2985,5000.0
1591,PARFININCO SA personne morale liée à Jacques M...,Président-Directeur Général,VICAT S.A.,2022-09-06,2022-09-07,Acquisition,24.5898,10000.0
1592,STEPHANE GERART,CIO - Directeur BU IA - Membre du comex,ONCODESIGN,2022-10-18,2022-10-20,Cession,14.42,17640.0
1593,ISABELLE SIMON,Secrétaire général,THALES,2022-09-28,2022-09-28,Acquisition gratuite d'actions,0.0,2493.0
1594,Eric BAISSUS,President du Directoire de Kalray,KALRAY,2022-08-10,2022-10-18,Cession,23.1546,2500.0


In [4]:
# company_names = df['company_name'].unique()

# ticker_mapping = {}

# for company_name in company_names:
#     ticker_mapping[company_name] = 'x'

# ticker_mapping = dict(sorted(ticker_mapping.items()))

# print(ticker_mapping)

ticker_mapping = {
    '1000MERCIS': 'ALMIL.PA',
    '2CRSI': 'AL2SI.PA',
    'ABC ARBITRAGE': 'ABCA.PA',
    'ABIONYX PHARMA': 'ABNX.PA',
    'ABIVAX': 'ABVX.PA',
    'ACCOR': 'AC.PA',
    'AEROPORTS DE PARIS': 'ADP.PA',
    'AFFLUENT MEDICAL': 'AFME.PA',
    'AFYREN': 'ALAFY.PA',
    'AGRIPOWER FRANCE SA': 'ALAGP.PA',
    'AIR FRANCE-KLM': 'AF.PA',
    'ALBIOMA': 'SECH.PA',
    'ALD': 'ALD.PA',
    'ALSTOM': 'ALO.PA',
    'ALTAREA': 'ALTA.PA',
    'ALTAREIT': 'AREIT.PA',
    'AMOEBA': 'ALMIB.PA',
    'ARAMIS GROUP': 'ARAMI.PA',
    'ARGAN': 'ARG.PA',
    'ARKEMA': 'AKE.PA',
    'ARTEA': 'ARTE.PA',
    'ASHLER & MANSON': 'MLAEM.PA',
    'ATARI': 'ALATA.PA',
    'ATLAND': 'ATLD.PA',
    'AUDACIA': 'ALAUD.PA',
    'AUREA': 'AURE.PA',
    'AXA': 'CS.PA',
    'BEACONSMIND AG': 'MLBMD.PA',
    'BELIEVE': 'BLV.PA',
    'BERNARD LOISEAU SA': 'ALDBL.PA',
    'BIGBEN INTERACTIVE': 'BIG.PA',
    'BILENDI': 'ALBLD.PA',
    'BNP PARIBAS': 'BNP.PA',
    'BOA CONCEPT': 'ALBOA.PA',
    'BOLLORE SE': 'BOL.PA',
    'BONDUELLE': 'BON.PA',
    'BOUYGUES': 'EN.PA',
    'BUREAU VERITAS': 'BVI.PA',
    'CAPGEMINI': 'CAP.PA',
    'CARBIOS': 'ALCRB.PA',
    'CARMILA SA': 'CARM.PA',
    'CARREFOUR': 'CA.PA',
    'CATERING INTERNATIONAL ET SERVICES': 'ALCIS.PA',
    'CATERPILLAR INC': 'CATR.PA',
    'CBO TERRITORIA': 'CBOT.PA',
    'CEGEDIM': 'CGM.PA',
    'CHARGEURS': 'CRI.PA',
    'CLARANOVA': 'CLA.PA',
    'CLASQUIN': 'ALCLA.PA',
    "COMPAGNIE DE L'ODET": 'ODET.PA',
    'COMPAGNIE DES ALPES': 'CDA.PA',
    'COURTOIS S.A.': 'COUR.PA',
    'COVIVIO': 'COV.PA',
    'COVIVIO HOTELS': 'COVH.PA',
    'CREDIT AGRICOLE S.A.': 'ACA.PA',
    'CROSSJECT': 'ALCJ.PA', 
    'CRYPTO BLOCKCHAIN INDUSTRIES': 'ALCBI.PA',
    'DANONE': 'BN.PA',
    'DASSAULT SYSTEMES': 'DSY.PA',
    'DEKUPLE': 'DLU.BE',
    'DERICHEBOURG': 'DBG.PA',
    'DNXCORP': 'ALDNX.PA',
    'ECOMIAM': 'ALECO.PA',
    'EDENRED': 'EDEN.PA',
    'EKINOPS': 'EKI.PA',
    'ELIOR GROUP': 'ELIOR.PA',
    'ELIS': 'ELIS.PA',
    'ENERTIME': 'ALENE.PA',
    'ENGIE': 'ENGI.PA',
    'ENOGIA': 'ALENO.PA',
    'ENTECH': 'ALESE.PA',
    'ERAMET': 'ERA.PA',
    'EROLD': 'ALPLA.PA',
    'ESKER': 'ALESK.PA',
    'ESSILORLUXOTTICA': 'EL.PA',
    'EURAZEO': 'RF.PA',
    'EUROBIO-SCIENTIFIC': 'ALERS.PA',
    'EUTELSAT COMMUNICATIONS': 'ETL.PA',
    'FOCUS ENTERTAINMENT': 'ALFOC.PA',
    'FONCIERE INEA': 'INEA.PA',
    'FORSEE POWER': 'FORSE.PA',
    'FREELANCE.COM': 'ALFRE.PA',
    'FREY': 'FREY.PA',
    'GAUSSIN S.A.': 'ALGAU.PA',
    'GAZTRANSPORT ET TECHNIGAZ': 'GTT.PA',
    'GECINA': 'GFC.PA',
    'GENFIT': 'GNFT.PA',
    'GETLINK SE': 'GET.PA',
    'GEVELOT S A': 'ALGEV.PA',
    'GL EVENTS': 'GLO.PA',
    'GRAINES VOLTZ': 'GRVO.PA',
    'GROUPE AIRWELL': 'ALAIR.PA',
    'GROUPE CRIT': 'CEN.PA',
    'GROUPE FLO': '???.PA',
    'GROUPE LDLC': 'ALLDL.PA',
    'GROUPE TERA': 'ALGTR.PA',
    'HAULOTTE GROUP': 'PIG.PA',
    'HERIGE': 'ALHRG.PA',
    'HERMES INTERNATIONAL': 'RMS.PA',
    'HOLCIM LTD': 'HOLN.SW',
    'HYDROGEN-REFUELING-SOLUTIONS SA': 'ALHRS.PA',
    'ICADE': 'ICAD.PA',
    'ID LOGISTICS GROUP': 'IDL.PA',
    'INFOTEL': 'INF.PA',
    'INNATE PHARMA': 'IPH.PA',
    'IPSOS': 'IPS.PA',
    'JCDECAUX SA': 'DEC.PA',
    'JCDECAUX SE': 'DEC.PA',
    'KALRAY': 'ALKAL.PA',
    'KERING': 'KER.PA',
    "L'AIR LIQUIDE": 'AI.PA',
    "L'OREAL": 'OR.PA',
    "LA FRANCAISE DE L'ENERGIE": 'FDE.PA',
    'LA FRANCAISE DES JEUX': 'FDJ.PA',
    'LAGARDERE SA': 'MMB.PA',
    'LANSON-BCC': 'ALLAN.PA',
    'LAURENT-PERRIER': 'LPE.PA',
    'LDC': 'LOUP.PA',
    'LE TANNEUR ET CIE': 'LTAN.PA',
    'LES AGENCES DE PAPA': 'MLPAP.PA',
    'LES CONSTRUCTEURS DU BOIS': 'MLLCB.PA',
    'LEXIBOOK': 'ALLEX.PA',
    'LNA SANTE': 'LNA.PA',
    'LUCIBEL': 'ALUCI.PA',
    'LVMH MOET HENNESSY-LOUIS VUITTON': 'MC.PA',
    'MAISONS DU MONDE': 'MDM.PA',
    'MEDESIS PHARMA S.A.': 'ALMDP.PA',
    'MEDIAN TECHNOLOGIES': 'ALMDT.PA',
    'MERSEN': 'MRN.PA',
    'METAVISIO': 'ALTHO.PA',
    'METROPOLE TELEVISION': 'MMT.PA',
    'MOULINVEST': 'ALMOU.PA',
    'MRM': 'MRM.PA',
    'NACON': 'NACON.PA',
    'NEOEN': 'NEOEN.PA',
    'NETGEM': 'NGP.PA',
    'NEXITY': 'NXI.PA',
    'NEXTEDIA': 'ALNXT.PA',
    'OBIZ': 'ALBIZ.PA',
    'OLYMPIQUE LYONNAIS GROUPE': 'OLG.PA',
    'ONCODESIGN': 'ALOPM.PA',
    'ONCODESIGN PRECISION MEDICINE S.A.': 'ALOPM.PA',
    'OREGE': 'OREGE.PA',
    'OSMOZIS': 'ALOSM.PA',
    'OVH GROUPE': 'OVH.PA',
    'PATRIMOINE ET COMMERCE': 'PAT.PA',
    'PERNOD RICARD': 'RI.PA',
    'PIERRE ET VACANCES': 'VAC.PA',
    'PLASTIQUES DU VAL DE LOIRE': 'PVL.PA',
    'PREDILIFE': 'ALPRE.PA',
    'PUBLICIS GROUPE SA': 'PUB.PA',
    'QUADIENT S.A.': 'QDT.PA',
    'QUANTUM GENOMICS': 'ALQGC.PA',
    'QWAMPLIFY': 'ALQWA.PA',
    'REALITES': 'ALREA.PA',
    'REMY COINTREAU': 'RCO.PA',
    'RENAULT': 'RNO.PA',
    'RIBER': 'ALRIB.PA',
    'ROBERTET SA': 'RBT.PA',
    'ROCHE BOBOIS SA': 'RBO.PA',
    'ROCTOOL': 'ALROC.PA',
    'ROTHSCHILD & CO': 'ROTH.PA',
    'SAFRAN': 'SAF.PA',
    'SAINT GOBAIN NEDERLAND BV': 'SGO.PA',
    'SAINT JEAN GROUPE': 'SABE.PA',
    'SANOFI': 'SAN.PA',
    'SCHNEIDER ELECTRIC SE': 'SU.PA',
    'SCOR SE': 'SCR.PA',
    'SEB S.A.': 'SK.PA',
    'SEGRO PUBLIC LIMITED COMPANY': 'SGRO.PA',
    'SERGE FERRARI GROUP': 'SEFER.PA',
    'SES IMAGOTAG': 'SESL.PA',
    'SIDETRADE': 'ALBFR.PA',
    'SII': 'SII.PA',
    'SMART GOOD THINGS HOLDING S.A.': 'MLSGT.PA',
    'SOCIETE BIC': 'BB.PA', 
    'SOCIETE DE LA TOUR EIFFEL': 'EIFF.PA',
    'SOCIETE FONCIERE LYONNAISE': 'FLY.PA',
    'SOITEC': 'SOI.PA',
    'SOMFY SA': '???.PA',
    'SPARTOO': 'ALSPT.PA',
    'SPIE SA': 'SPIE.PA',
    'SQLI': 'SQI.PA',
    'SRP GROUPE': 'SRP.PA',
    'STEF': 'STF.PA',
    'STREAMWIDE': 'ALSTW.PA',
    'TECHNICOLOR': 'TCHCS.PA',
    'TECHNICOLOR CREATIVE STUDIOS': 'TCHCS.PA',
    'TELEPERFORMANCE': 'TEP.PA',
    'TELEVERBIER SA': 'TVRB.PA',
    'TELEVISION FRANCAISE 1': 'TFI.PA',
    'THALES': 'HO.PA',
    'THE BLOCKCHAIN GROUP': 'ALTBG.PA',
    'TIKEHAU CAPITAL': 'TKO.PA',
    'TOOSLA': 'ALTOO.PA',
    'TOTALENERGIES SE': 'TTE.PA',
    'TRANSITION EVERGREEN': 'EGR.PA',
    'TRAVEL TECHNOLOGY INTERACTIVE': 'ALTTI.PA',
    'TRIGANO': 'TRI.PA',
    'U.M.H.S. – UNION METALLURGIQUE DE LA HAUTE SEINE': 'MLUMH.PA',
    'UBISOFT ENTERTAINMENT': 'UBI.PA',
    'UMALIS GROUP': 'MLUMG.PA',
    'UNIBAIL-RODAMCO-WESTFIELD SE': 'URW.PA',
    'UNION FINANCIERE DE FRANCE BANQUE': '???.PA',
    'VALLOUREC': 'VK.PA',
    'VALNEVA SE': 'VLA.PA',
    'VANTIVA': 'VANTI.PA',
    'VENTE-UNIQUE.COM': 'ALVU.PA',
    'VEOLIA ENVIRONNEMENT': 'VIE.PA',
    'VERALLIA': 'VRLA.PA',
    'VICAT S.A.': 'VCT.PA',
    'VINCI': 'DG.PA',
    'VIVENDI SE': 'VIV.PA',
    'VOGO': 'ALVGO.PA',
    'VOLTALIA': 'VLTSA.PA',
    'VOYAGEURS DU MONDE': 'ALVDM.PA',
    'WAVESTONE': 'WAVE.PA',
    'WENDEL': 'MF.PA',
    'WITBE': 'ALWIT.PA'
    }

In [5]:
df['ticker'] = df['company_name'].map(ticker_mapping)

df.head(15)

Unnamed: 0,name,position,company_name,date_transaction,date_notification,nature,price,volume,ticker
0,ARJIL COMMANDITEE-ARCO SOCIETE ANONYME personn...,PRESIDENT DIRECTEUR\nGENERAL,LAGARDERE SA,2022-11-22,2022-11-23,Acquisition,18.9857,1860.0,MMB.PA
1,The Home Bar Bevtech LTD personne morale liée ...,Président Directeur Général,SMART GOOD THINGS HOLDING S.A.,2022-11-28,2022-11-28,Cession,158.0,265.0,MLSGT.PA
2,Paul DU SAILLANT,Directeur Général Délégué,ESSILORLUXOTTICA,2022-12-23,2022-12-29,Cession,169.853,8831.747,EL.PA
3,Nicolas HIERONIMUS,Directeur Général et Administrateur,L'OREAL,2022-12-16,2022-12-19,DONATION DE LA NUE PROPRIETE EFFECTUEE,0.0,350994.0,OR.PA
4,paul lorne,Directeur Général Délégué,SPARTOO,2022-10-04,2022-10-05,Acquisition,1.23,4470.0,ALSPT.PA
5,Guillaume Demulier,President du Directoire,ROCHE BOBOIS SA,2022-07-01,2022-10-18,Acquisition definitive d'actions gratuites,0.0,22200.0,RBO.PA
6,MAKEMO CAPITAL SOCIETE PAR ACTIONS SIMPLIFIEE ...,"Président de AF&Co\nManagement, elle-même Géra...",TIKEHAU CAPITAL,2022-09-20,2022-09-21,Acquisition,25.1958,1435.0,TKO.PA
7,Aymerick PENICAUT,PDG,ASHLER & MANSON,2022-12-20,2022-12-22,Cession,4.5,32.0,MLAEM.PA
8,SOBREDA SAS personne morale liée à JOEL SAUVAGET,ADMINISTRATEUR,ECOMIAM,2022-09-30,2022-10-04,Cession,5.72,138800.0,ALECO.PA
9,COMPAGNIE DE L’ODET Société Européenne personn...,Administrateur de Bolloré SE,BOLLORE SE,2022-09-07,2022-09-12,Acquisition,4.7696,1052150.0,BOL.PA


In [6]:
import yfinance as yf
from datetime import timedelta

# Get current and future stock prices and variations
def get_stock_prices(row):
    try:
        ticker = row['ticker']
        start_day = row['date_transaction']
        end_day = start_day + timedelta(days=90)
        start_day = start_day.strftime('%Y-%m-%d')
        end_day = end_day.strftime('%Y-%m-%d')

        stock = yf.Ticker(ticker)
        stock_data = stock.history(start=start_day, end=end_day)
        prices = stock_data['Open']
        prices_dict = {
            'stock_price_open_d0': prices.values[0],
            'var_d1': prices.values[1] / prices.values[0],
            'var_d3': prices.values[3] / prices.values[0],
            'var_d5': prices.values[5] / prices.values[0],
            'var_d10': prices.values[10] / prices.values[0],
            'var_d20': prices.values[20] / prices.values[0],
            'var_d30': prices.values[30] / prices.values[0],
            'var_d60': prices.values[60] / prices.values[0]
        }
        return pd.Series(prices_dict)
    except:
        return {
            'stock_price_open_d0': float('nan'),
            'var_d1': float('nan'),
            'var_d3': float('nan'),
            'var_d5': float('nan'),
            'var_d10': float('nan'),
            'var_d20': float('nan'),
            'var_d30': float('nan'),
            'var_d60': float('nan')
        }

# Applying the function to new columns
df = pd.concat([df, df.apply(get_stock_prices, axis=1)], axis=1)

df.head(15)

df.to_pickle('amf-data.pkl')


- ???.PA: No data found for this date range, symbol may be delisted
- SECH.PA: No data found for this date range, symbol may be delisted
- SECH.PA: No data found for this date range, symbol may be delisted
- LTAN.PA: No data found, symbol may be delisted
- NGP.PA: No data found, symbol may be delisted
- MLPAP.PA: No data found, symbol may be delisted
- MLPAP.PA: No data found, symbol may be delisted
- LTAN.PA: No data found, symbol may be delisted
- SECH.PA: No data found for this date range, symbol may be delisted
- ???.PA: No data found for this date range, symbol may be delisted
- ???.PA: No data found for this date range, symbol may be delisted
- SECH.PA: No data found for this date range, symbol may be delisted
- ROTH.PA: No data found, symbol may be delisted
- ???.PA: No data found for this date range, symbol may be delisted
- SECH.PA: No data found for this date range, symbol may be delisted
- ???.PA: No data found for this date range, symbol may be delisted
- MLPAP.PA: No data 

In [8]:
# Test if cession and acquisition future variation are going up or down
amf_data = pd.read_pickle('amf-data.pkl')
amf_df = pd.DataFrame(amf_data)

df_cession = amf_df[amf_df['nature'] == 'Cession']
df_acquisition = amf_df[amf_df['nature'] == 'Acquisition']

var_means = {
    'cession': {
        'd1': df_cession['var_d1'].mean(),
        'd3': df_cession['var_d3'].mean(),
        'd5': df_cession['var_d5'].mean(),
        'd10': df_cession['var_d10'].mean(),
        'd20': df_cession['var_d20'].mean(),
        'd30': df_cession['var_d30'].mean(),
        'd60': df_cession['var_d60'].mean(),
    },
    'acquisition' : {
        'd1': df_acquisition['var_d1'].mean(),
        'd3': df_acquisition['var_d3'].mean(),
        'd5': df_acquisition['var_d5'].mean(),
        'd10': df_acquisition['var_d10'].mean(),
        'd20': df_acquisition['var_d20'].mean(),
        'd30': df_acquisition['var_d30'].mean(),
        'd60': df_acquisition['var_d60'].mean(),
    }
}

var_medians = {
    'cession': {
        'd1': df_cession['var_d1'].median(),
        'd3': df_cession['var_d3'].median(),
        'd5': df_cession['var_d5'].median(),
        'd10': df_cession['var_d10'].median(),
        'd20': df_cession['var_d20'].median(),
        'd30': df_cession['var_d30'].median(),
        'd60': df_cession['var_d60'].median(),
    },
    'acquisition' : {
        'd1': df_acquisition['var_d1'].median(),
        'd3': df_acquisition['var_d3'].median(),
        'd5': df_acquisition['var_d5'].median(),
        'd10': df_acquisition['var_d10'].median(),
        'd20': df_acquisition['var_d20'].median(),
        'd30': df_acquisition['var_d30'].median(),
        'd60': df_acquisition['var_d60'].median(),
    }
}

var_means = {key: {inner_key: round(inner_value, 2) for inner_key, inner_value in value.items()} for key, value in var_means.items()}
var_medians = {key: {inner_key: round(inner_value, 2) for inner_key, inner_value in value.items()} for key, value in var_medians.items()}

print(var_means)
print(var_medians)

{'cession': {'d1': 1.0, 'd3': 1.01, 'd5': 1.01, 'd10': 1.04, 'd20': 1.06, 'd30': 1.08, 'd60': 1.19}, 'acquisition': {'d1': 1.0, 'd3': 1.0, 'd5': 1.0, 'd10': 1.0, 'd20': 1.01, 'd30': 1.02, 'd60': 1.07}}
{'cession': {'d1': 1.0, 'd3': 1.01, 'd5': 1.01, 'd10': 1.04, 'd20': 1.06, 'd30': 1.08, 'd60': 1.19}, 'acquisition': {'d1': 1.0, 'd3': 1.0, 'd5': 1.0, 'd10': 1.0, 'd20': 1.01, 'd30': 1.02, 'd60': 1.07}}
