In [236]:
import pandas as pd
from sqlalchemy import create_engine
import requests

In [None]:
file_path = 'new_source.geocoded.csv'
df_new_source = pd.read_csv(file_path)

# Display the first few rows of the DataFrame
df_new_source.head()[[ 'result_label', 'result_score', 'result_score_next', 'result_type',
       'result_id', 'result_housenumber', 'result_name', 'result_street',
       'result_postcode', 'result_city', 'result_context', 'result_citycode',
       'result_oldcitycode', 'result_oldcity', 'result_district',
       'result_status']]



# Replace the user, password, host, port, and db_name with your database credentials and database name
user = 'qfdmo'
password = 'qfdmo'
host = 'localhost'
port = '6543' 
db_name = 'qfdmo'

# Create the connection URL
connection_string = f'postgresql://{user}:{password}@{host}:{port}/{db_name}'

# Create the engine
engine = create_engine(connection_string)

table_name = 'qfdmo_finalacteur'

df = pd.read_sql_table(table_name, engine)


def split_char_from_number(s):
    return ''.join([char if char.isdigit() else ' ' + char for char in str(s)])

# Apply the function to the address column
df_new_source['result_housenumber'] = df_new_source['result_housenumber'].apply(split_char_from_number)
df_new_source['result_postcode'] = pd.to_numeric(df_new_source['result_postcode'], errors='coerce').astype('Int64')


df_new_source.head()[[ 'NOM','result_label', 'result_score', 'result_score_next', 'result_type',
       'result_id', 'result_housenumber', 'result_name', 'result_street',
       'result_postcode', 'result_city', 'result_context', 'result_citycode',
       'result_oldcitycode', 'result_oldcity', 'result_district',
       'result_status']]

base_url = 'https://recherche-entreprises.api.gouv.fr'


def call_api(row):
    # Construct the query from the row
    query = f'{row["NOM"].split("/")[0]} {row["result_postcode"]} {row["result_city"]}'
    print(query)

    params = {
        'q': query
    }

    endpoint = '/search'

    # Make the GET request
    response = requests.get(url=f'{base_url}{endpoint}', params=params)
    print(response.request.url)

    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
    
    # Check if 'results' key exists and it has at least one item
        if 'results' in data and len(data['results']) > 0:
            siege_info = data['results'][0].get('siege', {})
            
            siret_first_result = siege_info.get('siret')
            etat_admin = siege_info.get('etat_administratif')
            return pd.Series([siret_first_result, etat_admin])
    else:
        # Return None or some default values if needed
        return pd.Series([None, None])



# Apply the function to each row in the DataFrame
df_new_source[['siret', 'etat_admin']] = df_new_source.apply(call_api, axis=1)



df_new_source[df_new_source['siret'].notnull()][[ 'siret','NOM','etat_admin','result_label', 'result_score', 'result_score_next', 'result_type',
       'result_id', 'result_housenumber', 'result_name', 'result_street',
       'result_postcode', 'result_city', 'result_context', 'result_citycode',
       'result_oldcitycode', 'result_oldcity', 'result_district',
       'result_status']]



# Filter df_new_source and select the required columns
df_new_source_filtered = df_new_source[df_new_source['siret'].notnull()][
    ['siret', 'NOM', 'etat_admin', 'result_label', 'result_score', 'result_score_next', 'result_type',
     'result_id', 'result_housenumber', 'result_name', 'result_street',
     'result_postcode', 'result_city', 'result_context', 'result_citycode',
     'result_oldcitycode', 'result_oldcity', 'result_district',
     'result_status']
]

# Merge df_new_source_filtered with df using 'siret' column
# Assuming 'siret' is the column name in both dataframes
# and that df is the other dataframe you are referring to
merged_df = pd.merge(df_new_source_filtered, df, on='siret', how='left')



merged_df.to_csv('AdheÃÅrents_Ressourceries_et_Recycleries_enriched_by_qfdmo.csv')

