In [None]:
 # Imports and tokens

import os
from airtable import Airtable
import pandas as pd
from datetime import datetime

# Always hide tokens in a safe place
os.environ['AIRTABLE_AUTH_TOKEN'] = ''

airtable_token = os.environ['AIRTABLE_AUTH_TOKEN']

# Log in to your Airtable account
# Open your database
# Search for base_id. It starts with app...
# Search for table_name. It starts with tbl...
base_id = 'app*****'
table_name = 'tbl*****'

## Load from Airtable

In [11]:
# Retrieve data from Airtable
current_time = datetime.now().strftime('%H:%M')

print(f'Loading table {table_name}. Start time: {current_time}')

airtable = Airtable(base_id, table_name, airtable_token)
airtable_data = airtable.get_all()
# airtable_data

In [12]:
print(f'Finishing loading {table_name}. Finish time: {current_time}')

In [None]:
import pandas as pd

def airtable_to_dataframe(airtable_data, fields=None):
    """
    Convert data from Airtable (list of records) to a Pandas DataFrame.

    Parameters:
    - airtable_data (list): List of records where each record is a dictionary with a 'fields' key.
    - fields (list, optional): List of field names to include in the DataFrame. If None, include all fields.

    Returns:
    - pd.DataFrame: Pandas DataFrame containing the data.
    """
    if not airtable_data:
        return pd.DataFrame()

    # Extract the 'fields' information from each record
    fields_data = [record.get('fields', {}) for record in airtable_data]

    # Convert the list of dictionaries to a DataFrame
    dataframe = pd.DataFrame(fields_data)

    # Filter the DataFrame based on the specified fields
    if fields:
        dataframe = dataframe[fields]

    return dataframe

### Column trimmer

In [None]:
# Create df from selected table, bd
# Cols. Insert needed structure here
columns = ['Id', 'nAmE', 'website', 'type 1', 'country']

# Start
df = airtable_to_dataframe(airtable_data, fields=columns)

In [15]:
df

In [None]:
#  Column names are not optimal for SQL database. Normilize them
def normalize_column_names(df):
    if df.empty:
        return df

    df.columns = df.columns.str.lower().str.replace(' ', '_')\
        .str.replace('/', '_')\
        .str.replace(',', '')\
        .str.replace('(', '')\
        .str.replace(')', '')
    return df

In [17]:
df_normalized = normalize_column_names(df)
df_normalized

## Create table, BigQuery

In [None]:
def generate_create_table_statement(df, table_name='your_table_name'):
    """
    Generate a BigQuery CREATE TABLE statement based on the columns and their types in a Pandas DataFrame.

    Parameters:
    - df (pd.DataFrame): Pandas DataFrame.
    - table_name (str): Name of the BigQuery table.

    Returns:
    - str: BigQuery CREATE TABLE statement.

    ! Does not provide clustering and partitioning !
    """
    if df.empty:
        raise ValueError("DataFrame is empty. Cannot generate CREATE TABLE statement.")

    columns = df.columns
    types = [df[col].dtype.name.lower() for col in columns]

    # Mapping Pandas types to BigQuery types
    bigquery_types = {
        'int64': 'INT64',
        'float64': 'FLOAT64',
        'object': 'STRING',
        'bool': 'BOOL',
        'datetime64[ns]': 'TIMESTAMP',
        'timedelta64[ns]': 'TIMESTAMP',
    }

    # Generate the CREATE TABLE statement
    create_table_statement = f"CREATE OR REPLACE TABLE `{table_name}` (\n"

    for col, col_type in zip(columns, types):
        bq_type = bigquery_types.get(col_type, 'STRING')
        create_table_statement += f"  `{col}` {bq_type},\n"

    create_table_statement = create_table_statement.rstrip(',\n') + "\n)"

    return create_table_statement

In [None]:
dataset = 'indrive-inlocal.new_ventures.'

# insert name for a table
desired_table_name = 'full_db'

# Generate create table statement
generate_create_table_statement = generate_create_table_statement(df_normalized, table_name=dataset + desired_table_name)

In [23]:
print(generate_create_table_statement)

### ELT below is only for a small tables (<0,5 mln rows) with frequent column changes

In [30]:
# Create or replace a table
pd.read_gbq(generate_create_table_statement)

In [31]:
# Load new data into it
df_normalized.to_gbq(destination_table='dataset.table', project_id='project', if_exists='replace')