# Transfer scrap from LinkedIn to Airtable

Takes a JSON file that was scrapped with data from LinkedIn and makes it ready to import to Airtable

In [47]:
import os
import pandas
import airtable
from marketing_data_cleaning import DATA_FOLDER_PATH

In [48]:
df = pandas.read_json(
    DATA_FOLDER_PATH / 'output.json',
    orient='records',
    encoding='utf-8'
)


In [49]:
df['enriched'] = 'Enrichi'

In [50]:
df['company'].describe()

count     2184
unique      15
top       IKEA
freq       505
Name: company, dtype: object

## Clean main data
Before sending the data, clean and normalize the data

In [51]:
def normalize_names(value):
    return str(value).lower().title()


columns_to_normalize = ['last_name', 'first_name', 'full_name']

for column in columns_to_normalize:
    df[column] = df[column].apply(normalize_names)


In [52]:
df = df.drop_duplicates()

## Preview CSV

In [53]:
df.to_csv(DATA_FOLDER_PATH / 'linkedin_profiles.csv', index=False)


## Airtable
Convert the initial json format to the Airtable fields

In [30]:
instance = airtable.Airtable(
    os.getenv('AIRTABLE_BASE_ID'),
    os.getenv('SCRAPING_TABLE_NAME'), 
    os.getenv('AIRTABLE_API_KEY')
)

In [31]:
CONVERSION_COLUMNS = {
    'first_name': 'Prénom',
    'last_name': 'Nom',
    'company': 'Entreprise',
    'linkedin': 'LinkedIn',
    'full_name': 'Nom complet',
    'company_linkedin': 'Company LinkedIn',
    'enriched': 'Statut enrichissement',
    'website': 'Site entreprise',
    'position': 'Poste'
}
airtable_df = df.rename(columns=CONVERSION_COLUMNS)

In [32]:
data = airtable_df.to_json(orient='records', force_ascii=False)
# instance.batch_insert(data)
