In [1]:
import os.path
import json
import pandas as pd
import io
import pytz
from datetime import datetime

# sharepoint connection
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File

#Element password storage
try:
    from mlutils import storage
    PASSWORD = storage.get_secret("password_for_sharepoint_bq_transfer")
except ImportError:
    # read from local .env file
    from dotenv import load_dotenv
    import os
    load_dotenv()
    PASSWORD = os.getenv('PASSWORD')

# Bigquery
from google.cloud import bigquery

try:
    from mlutils import dataset, connector # Element environment connectors
    APP_PATH = '/home/jupyter/automation/inhome/'
except ImportError:
    APP_PATH = ""

In [2]:
def get_config(APP_PATH, config_file):
    '''Return config file'''
    config_path = os.path.join(APP_PATH, config_file)
    with open(config_path, 'r') as file:
        config = json.load(file)
    return config

In [3]:
CONFIG = get_config(APP_PATH, "config.json")

url = CONFIG['url']
username = CONFIG['username']
folder_path = CONFIG['folder_path']
file_name = CONFIG['file_name']
file_path = os.path.join(folder_path, file_name).replace("\\", "/")
#sheet_name = CONFIG['sheet_name']

In [4]:
# context setup
ctx_auth = AuthenticationContext(url)
ctx_auth.acquire_token_for_user(username, PASSWORD)
ctx = ClientContext(url, ctx_auth)

### Spark W+ Redemption Files

In [5]:
def get_data_redemption(folder_path):
    # get folder
    folder = ctx.web.get_folder_by_server_relative_url(folder_path)
    ctx.load(folder)
    ctx.execute_query()

    # get files
    files = folder.files
    ctx.load(files)
    ctx.execute_query()

    # initialize an empty dataframe
    all_data = pd.DataFrame()

    # read all files in the folder
    for file in files:
        print(file.properties["Name"])
        if file.properties["Name"].endswith('.csv'):
            # get file
            file = File.open_binary(ctx, folder_path + file.properties["Name"])

            # read file content into a pandas dataframe
            data = io.StringIO(file.content.decode())
            df = pd.read_csv(data)

            # append data to all_data dataframe
            all_data = pd.concat([all_data, df])

        elif file.properties["Name"].endswith('.xlsx'):
            # get file
            file = ctx.web.get_file_by_server_relative_url(folder_path + file.properties["Name"]).get().execute_query()
            content = file.read()
            df = pd.read_excel(content)

            # append data to all_data dataframe
            all_data = pd.concat([all_data, df])

    print("Done")
    return all_data

In [6]:
all_data_redemption = get_data_redemption(folder_path)

Aug 2024.csv
May 2024.csv
Spark - Oct 2024.xlsx
April 2024.csv
January 2024.xlsx
Dec 2023.xlsx
March 2024.csv
July 2024.csv
Sept 2024.csv
Nov 2024.csv
Feb 2024.xlsx
June 2024.csv
Done


In [7]:
all_data_redemption.info()

<class 'pandas.core.frame.DataFrame'>
Index: 184165 entries, 0 to 14751
Data columns (total 19 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   pg_cust_id           131650 non-null  object 
 1   PRTNR_PGM_NM         184165 non-null  object 
 2   PROMO_CD             184165 non-null  object 
 3   redem_month          184165 non-null  int64  
 4   order_plcd_mnth      110589 non-null  float64
 5   total_orders_amount  110589 non-null  float64
 6   EMAIL_ID             153364 non-null  object 
 7   PG_CUST_ID           21736 non-null   object 
 8   Unnamed: 7           0 non-null       float64
 9   Unnamed: 8           0 non-null       float64
 10  Unnamed: 9           0 non-null       float64
 11  Unnamed: 10          2 non-null       float64
 12  Unnamed: 11          0 non-null       float64
 13  Unnamed: 12          1 non-null       float64
 14  Unnamed: 5           0 non-null       float64
 15  Unnamed: 6           0 

#### Clean and upload

In [8]:
all_data_redemption.columns

Index([         'pg_cust_id',        'PRTNR_PGM_NM',            'PROMO_CD',
               'redem_month',     'order_plcd_mnth', 'total_orders_amount',
                  'EMAIL_ID',          'PG_CUST_ID',          'Unnamed: 7',
                'Unnamed: 8',          'Unnamed: 9',         'Unnamed: 10',
               'Unnamed: 11',         'Unnamed: 12',          'Unnamed: 5',
                'Unnamed: 6',                 13930,                  8013,
          0.5752333094041637],
      dtype='object')

In [9]:
all_data_redemption_clean = all_data_redemption[['PRTNR_PGM_NM', 'PROMO_CD', 'redem_month',
                                                 'order_plcd_mnth', 'total_orders_amount', 'EMAIL_ID']].copy()

In [13]:
# trim email id
all_data_redemption_clean['EMAIL_ID'] = all_data_redemption_clean['EMAIL_ID'].str.strip()

# generate year
def assign_year(month):
    if month == 12:
        return 2023
    else:
        return 2024

all_data_redemption_clean['redem_year'] = all_data_redemption_clean['redem_month'].apply(assign_year)

# generate month string
month_map = {
    1: "Jan",
    2: "Feb",
    3: "Mar",
    4: "Apr",
    5: "May",
    6: "Jun",
    7: "Jul",
    8: "Aug",
    9: "Sep",
    10: "Oct",
    11: "Nov",
    12: "Dec"
}

all_data_redemption_clean['redem_month_str'] = all_data_redemption_clean['redem_month'].map(month_map)
all_data_redemption_clean['order_plcd_month_str'] = all_data_redemption_clean['order_plcd_mnth'].map(month_map)

In [14]:
all_data_redemption_clean.head()

Unnamed: 0,PRTNR_PGM_NM,PROMO_CD,redem_month,order_plcd_mnth,total_orders_amount,EMAIL_ID,redem_year,redem_month_str,order_plcd_month_str
0,sparkpromo21,SPAE25Q4RHZ,8,8.0,1200.18,yadiraanibal@yahoo.com,2024,Aug,Aug
1,sparkpromo21,SPA6NBKYJ26,8,8.0,486.12,toddcurriejr@yahoo.com,2024,Aug,Aug
2,sparkpromo21,SPA6GPN9NM7,8,8.0,277.91,jagyomama@gmail.com,2024,Aug,Aug
3,sparkpromo21,SPACSD4X3Y6,8,8.0,48.0,joshgregory573@gmail.com,2024,Aug,Aug
4,sparkpromo21,SPAVK3QYS3G,8,,,wahabahmedkolawole@gmail.com,2024,Aug,


In [15]:
CONFIG_R = get_config(APP_PATH, "config.json")

In [16]:
# load to a BQ table
table_id = CONFIG_R['bq_table_nm']
job_config = bigquery.LoadJobConfig(write_disposition="WRITE_TRUNCATE") # overwrite existing data

client = bigquery.Client()

load_job = client.load_table_from_dataframe(all_data_redemption_clean, table_id, job_config=job_config)
# Wait for load job to complete.
load_job.result()



<google.cloud.bigquery.job.load.LoadJob at 0x13f60b68760>

In [18]:
all_data_redemption_clean[all_data_redemption_clean.redem_month_str == 'Feb'].head()

Unnamed: 0,PRTNR_PGM_NM,PROMO_CD,redem_month,order_plcd_mnth,total_orders_amount,EMAIL_ID,redem_year,redem_month_str,order_plcd_month_str
0,sparkpromo21,SPABYQJG3QS,2,2.0,9031.96,,2024,Feb,Feb
1,sparkpromo21,SPANBYAHCZ7,2,2.0,4736.04,,2024,Feb,Feb
2,sparkpromo21,SPABR58ZQJ5,2,2.0,4570.55,,2024,Feb,Feb
3,sparkpromo21,SPAV926Y87U,2,2.0,4101.25,,2024,Feb,Feb
4,sparkpromo21,SPAQP6P4B4J,2,2.0,3394.35,,2024,Feb,Feb


### Driver Tier Data

In [121]:
CONFIG = get_config(APP_PATH, "config_driver.json")

In [45]:
url = CONFIG['url']
username = CONFIG['username']
folder_path = CONFIG['folder_path']
file_name = CONFIG['file_name']
file_path = os.path.join(folder_path, file_name).replace("\\", "/")
sheet_nm = CONFIG['sheet_name']

# context setup
ctx_auth = AuthenticationContext(url)
ctx_auth.acquire_token_for_user(username, PASSWORD)
ctx = ClientContext(url, ctx_auth)

In [34]:
# get folder
folder = ctx.web.get_folder_by_server_relative_url(folder_path)
ctx.load(folder)
ctx.execute_query()

# get files
files = folder.files
ctx.load(files)
ctx.execute_query()

# initialize an empty dataframe
all_data = pd.DataFrame()

# read all files in the folder
for file in files:
    print(file.properties["Name"])
    # get file
    file = ctx.web.get_file_by_server_relative_url(folder_path + file.properties["Name"]).get().execute_query()
    content = file.read()
    # Read excel file without specifying sheet_name
    xls = pd.ExcelFile(content)

    # Convert all sheet names to lowercase and remove spaces
    all_sheets = [s.lower().replace(' ', '') for s in xls.sheet_names]

    # Now you can match 'tier1' to any of the formatted sheet names
    if sheet_nm in all_sheets:
        df = pd.read_excel(xls, sheet_name=xls.sheet_names[all_sheets.index('tier1')])
        df['file_name'] = file.properties["Name"]
        # append data to all_data dataframe
        all_data = pd.concat([all_data, df])
    else:
        print("tier1 sheet not found, skip this file")

print("Done")

2024MarPerf_RewardApril2024__Driverloyalty_Program_Masterfile.xlsx
2024SeptPerf_RewardOct2024__Driverloyalty_Program_Masterfile.xlsx
2024OctPerf_RewardNov2024__Driverloyalty_Program_Masterfile.xlsx
2024JanPerf_RewardFeb2024__Driverloyalty_Program_Masterfile.xlsx
tier1 sheet not found, skip this file
2024FebPerf_RewardMar2024__Driverloyalty_Program_Masterfile.xlsx
2024AugPerf_RewardSept2024__Driverloyalty_Program_Masterfile2.xlsx
2024AprilPerf_RewardMay2024__Driverloyalty_Program_Masterfile.xlsx
2024JulyPerf_RewardAug2024__Driverloyalty_Program_Masterfilet.xlsx
2024MayPerf_RewardJune2024_Driverloyalty_Program_Masterfile.xlsx
Done


In [36]:
all_data.info

<class 'pandas.core.frame.DataFrame'>
Index: 604596 entries, 0 to 76103
Data columns (total 23 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   Drvr User Id                604596 non-null  object 
 1   Drvr First Nm               604595 non-null  object 
 2   Drvr Last Nm                604594 non-null  object 
 3   Market Nm                   604596 non-null  object 
 4   Drvr Actv Status            604596 non-null  object 
 5   Drvr Loyalty Tier           604596 non-null  object 
 6   Drvr Loyalty Tier Segment   17610 non-null   object 
 7   Past Month Completed Trips  604596 non-null  int64  
 8   Customer Ratings            604596 non-null  float64
 9   Customer Rating Metric      539262 non-null  object 
 10  file_name                   604596 non-null  object 
 11  DRVR_UUID                   388531 non-null  object 
 12  Codes                       65334 non-null   object 
 13  ATT Codes           

In [43]:
all_data.file_name.value_counts()

file_name
2024OctPerf_RewardNov2024__Driverloyalty_Program_Masterfile.xlsx      95416
2024SeptPerf_RewardOct2024__Driverloyalty_Program_Masterfile.xlsx     85539
2024MarPerf_RewardApril2024__Driverloyalty_Program_Masterfile.xlsx    81436
2024MayPerf_RewardJune2024_Driverloyalty_Program_Masterfile.xlsx      76104
2024AprilPerf_RewardMay2024__Driverloyalty_Program_Masterfile.xlsx    69295
2024JulyPerf_RewardAug2024__Driverloyalty_Program_Masterfilet.xlsx    66919
2024FebPerf_RewardMar2024__Driverloyalty_Program_Masterfile.xlsx      65334
2024AugPerf_RewardSept2024__Driverloyalty_Program_Masterfile2.xlsx    64553
Name: count, dtype: int64

In [46]:
# Add Jan to all_data

file_name = "2024JanPerf_RewardFeb2024__Driverloyalty_Program_Masterfile.xlsx"
file = ctx.web.get_file_by_server_relative_url(folder_path + file_name).get().execute_query()
content = file.read()
df = pd.read_excel(content)
df['file_name'] = file_name

In [48]:
all_data = pd.concat([all_data, df])

In [52]:
all_data["Drvr Loyalty Tier"].value_counts()

Drvr Loyalty Tier
Tier 1    655313
Tier 0     17610
Name: count, dtype: int64

#### Remove uneccesary columns

In [65]:
all_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 672923 entries, 0 to 68326
Data columns (total 25 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   Drvr User Id                672923 non-null  object 
 1   Drvr First Nm               672922 non-null  object 
 2   Drvr Last Nm                672921 non-null  object 
 3   Market Nm                   672923 non-null  object 
 4   Drvr Actv Status            672923 non-null  object 
 5   Drvr Loyalty Tier           672923 non-null  object 
 6   Drvr Loyalty Tier Segment   17610 non-null   object 
 7   Past Month Completed Trips  672923 non-null  int64  
 8   Customer Ratings            672923 non-null  float64
 9   Customer Rating Metric      607589 non-null  object 
 10  file_name                   672923 non-null  object 
 11  DRVR_UUID                   388531 non-null  object 
 12  Codes                       65334 non-null   object 
 13  ATT Codes           

In [63]:
all_data.Year.value_counts()

Year
2024.0    68327
Name: count, dtype: int64

In [62]:
all_data.Month.value_counts()

Month
May        76104
January    68327
Name: count, dtype: int64

In [61]:
all_data[all_data.Month.notnull()]['file_name'].value_counts()

file_name
2024MayPerf_RewardJune2024_Driverloyalty_Program_Masterfile.xlsx    76104
2024JanPerf_RewardFeb2024__Driverloyalty_Program_Masterfile.xlsx    68327
Name: count, dtype: int64

In [68]:
all_data['Unnamed: 11'].value_counts()

Unnamed: 11
SPADP86J6MZ    1
SPAW8NZP7JT    1
SPA3YDUWCMQ    1
SPAGH9E6FRS    1
SPA5RJ3VYPF    1
              ..
SPAQXUYZE38    1
SPAS3W5TXFF    1
SPAUMFH3MY6    1
SPAS5Y3YG8A    1
SPA74QESAY8    1
Name: count, Length: 64553, dtype: int64

In [69]:
all_data['Unnamed: 19'].value_counts()

Unnamed: 19
     68327
Name: count, dtype: int64

In [70]:
all_data['Codes'].value_counts()

Codes
SPAV7TKP87E    1
SPAEPPMDCCX    1
SPABX3XC6EH    1
SPATZ4R2Y6A    1
SPA7C9S5G2J    1
              ..
SPA3N6J3TQ4    1
SPA9GHYUQZ7    1
SPA7JKR3D4E    1
SPA48HUG2TR    1
SPAZGPF2NA7    1
Name: count, Length: 65334, dtype: int64

In [73]:
all_data['DRVR_UUID'].value_counts()

DRVR_UUID
A790F8D8-5EEC-4211-8178-5A0A37DA4A42    5
6bb2c5c4-0c46-416c-b62a-9d7da841e651    5
e0f0e133-f098-44d6-b940-1b6c71135957    5
EB0E1910-6A58-4DB4-9D73-61E135155FDB    5
671da506-8aa8-4cd9-8d7f-0b33499e79de    5
                                       ..
d1328db0-f233-41c4-92b2-a5bf2b04eece    1
20f9c115-b694-4082-b13b-2332023fcf39    1
77051883-EE7D-4A7C-9CB6-CDE6E57D36E2    1
665076e5-f2c9-4433-9f33-ccc78283579b    1
a6c50dd1-40bb-4494-896b-842298443bd3    1
Name: count, Length: 196247, dtype: int64

In [75]:
all_data['Drvr User Id'].head()

0       0.999coins@gmail.com
1        0springer3@live.com
2    1alexisrochoa@gmail.com
3        1b1s4lyfe@gmail.com
4       1belladog2@gmail.com
Name: Drvr User Id, dtype: object

In [83]:
all_data['Drvr Loyalty Tier Segment'].value_counts()

Drvr Loyalty Tier Segment
Segment 1    17610
Name: count, dtype: int64

In [80]:
all_data[all_data['Drvr Loyalty Tier Segment'].notnull()]['file_name'].value_counts()

file_name
2024MarPerf_RewardApril2024__Driverloyalty_Program_Masterfile.xlsx    9840
2024AprilPerf_RewardMay2024__Driverloyalty_Program_Masterfile.xlsx    7770
Name: count, dtype: int64

In [82]:
all_data[all_data['Drvr Loyalty Tier Segment'].notnull()]['Drvr Loyalty Tier'].value_counts()

Drvr Loyalty Tier
Tier 0    17610
Name: count, dtype: int64

In [84]:
remove_columns = ['Drvr Loyalty Tier Segment',
                 'Unnamed: 19',
                 'Unnamed: 11',
                 'Codes',
                 'ATT Codes',
                 'DRVR_UUID',
                 'Month',
                 'Year']

In [85]:
all_data_clean = all_data.drop(columns=remove_columns)

In [86]:
all_data_clean.info()

<class 'pandas.core.frame.DataFrame'>
Index: 672923 entries, 0 to 68326
Data columns (total 17 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   Drvr User Id                672923 non-null  object 
 1   Drvr First Nm               672922 non-null  object 
 2   Drvr Last Nm                672921 non-null  object 
 3   Market Nm                   672923 non-null  object 
 4   Drvr Actv Status            672923 non-null  object 
 5   Drvr Loyalty Tier           672923 non-null  object 
 6   Past Month Completed Trips  672923 non-null  int64  
 7   Customer Ratings            672923 non-null  float64
 8   Customer Rating Metric      607589 non-null  object 
 9   file_name                   672923 non-null  object 
 10  AcceptanceRate Ratings      144431 non-null  float64
 11  AcceptanceRate Metrics      144431 non-null  object 
 12  OTA Ratings                 144431 non-null  float64
 13  OTA Metrics         

#### Create Month and Year use `file_name`

In [94]:
all_data_clean.file_name.value_counts()

file_name
2024OctPerf_RewardNov2024__Driverloyalty_Program_Masterfile.xlsx      95416
2024SeptPerf_RewardOct2024__Driverloyalty_Program_Masterfile.xlsx     85539
2024MarPerf_RewardApril2024__Driverloyalty_Program_Masterfile.xlsx    81436
2024MayPerf_RewardJune2024_Driverloyalty_Program_Masterfile.xlsx      76104
2024AprilPerf_RewardMay2024__Driverloyalty_Program_Masterfile.xlsx    69295
2024JanPerf_RewardFeb2024__Driverloyalty_Program_Masterfile.xlsx      68327
2024JulyPerf_RewardAug2024__Driverloyalty_Program_Masterfilet.xlsx    66919
2024FebPerf_RewardMar2024__Driverloyalty_Program_Masterfile.xlsx      65334
2024AugPerf_RewardSept2024__Driverloyalty_Program_Masterfile2.xlsx    64553
Name: count, dtype: int64

In [99]:
all_data_clean['Year_Perf'] = all_data_clean.file_name.str[:4]
all_data_clean['Month_Perf'] = all_data_clean.file_name.str[4:7]
all_data_clean['Month_Reward'] = all_data_clean['file_name'].str.extract('Reward(\w{3})') # 3 characters after 'Reward'
all_data_clean['Year_Reward'] = all_data_clean['file_name'].str.extract('(\d{4})__') # 4 digits before '__'

In [102]:
all_data_clean[['file_name', 
               'Year_Perf',
               'Month_Perf',
               'Year_Reward',
               'Month_Reward']].sample(10)

Unnamed: 0,file_name,Year_Perf,Month_Perf,Year_Reward,Month_Reward
25433,2024FebPerf_RewardMar2024__Driverloyalty_Progr...,2024,Feb,2024,Mar
4359,2024MarPerf_RewardApril2024__Driverloyalty_Pro...,2024,Mar,2024,Apr
62045,2024JulyPerf_RewardAug2024__Driverloyalty_Prog...,2024,Jul,2024,Aug
21155,2024JanPerf_RewardFeb2024__Driverloyalty_Progr...,2024,Jan,2024,Feb
48498,2024MarPerf_RewardApril2024__Driverloyalty_Pro...,2024,Mar,2024,Apr
80130,2024OctPerf_RewardNov2024__Driverloyalty_Progr...,2024,Oct,2024,Nov
22455,2024FebPerf_RewardMar2024__Driverloyalty_Progr...,2024,Feb,2024,Mar
18938,2024SeptPerf_RewardOct2024__Driverloyalty_Prog...,2024,Sep,2024,Oct
70,2024MarPerf_RewardApril2024__Driverloyalty_Pro...,2024,Mar,2024,Apr
45140,2024MarPerf_RewardApril2024__Driverloyalty_Pro...,2024,Mar,2024,Apr


In [113]:
all_data_clean[['file_name', 
               'Year_Perf',
               'Month_Perf',
               'Year_Reward',
               'Month_Reward']].drop_duplicates()

Unnamed: 0,file_name,Year_Perf,Month_Perf,Year_Reward,Month_Reward
0,2024MarPerf_RewardApril2024__Driverloyalty_Pro...,2024,Mar,2024.0,Apr
0,2024SeptPerf_RewardOct2024__Driverloyalty_Prog...,2024,Sep,2024.0,Oct
0,2024OctPerf_RewardNov2024__Driverloyalty_Progr...,2024,Oct,2024.0,Nov
0,2024FebPerf_RewardMar2024__Driverloyalty_Progr...,2024,Feb,2024.0,Mar
0,2024AugPerf_RewardSept2024__Driverloyalty_Prog...,2024,Aug,2024.0,Sep
0,2024AprilPerf_RewardMay2024__Driverloyalty_Pro...,2024,Apr,2024.0,May
0,2024JulyPerf_RewardAug2024__Driverloyalty_Prog...,2024,Jul,2024.0,Aug
0,2024MayPerf_RewardJune2024_Driverloyalty_Progr...,2024,May,,Jun
0,2024JanPerf_RewardFeb2024__Driverloyalty_Progr...,2024,Jan,2024.0,Feb


#### Final touches

In [115]:
# trim driver user id
all_data_clean['Drvr User Id'] = all_data_clean['Drvr User Id'].str.strip()

In [117]:
# replace space with '_' for all columns
all_data_clean.columns = all_data_clean.columns.str.replace(' ', '_')

In [118]:
all_data_clean.head()

Unnamed: 0,Drvr_User_Id,Drvr_First_Nm,Drvr_Last_Nm,Market_Nm,Drvr_Actv_Status,Drvr_Loyalty_Tier,Past_Month_Completed_Trips,Customer_Ratings,Customer_Rating_Metric,file_name,...,AcceptanceRate_Metrics,OTA_Ratings,OTA_Metrics,DropRate_Ratings,DropRate_Metric,Last_trip,Year_Perf,Month_Perf,Month_Reward,Year_Reward
0,0.999coins@gmail.com,LaKisha,Tisdale,Walmart Huntsville,ACTIVE,Tier 1,80,4.79,GREEN,2024MarPerf_RewardApril2024__Driverloyalty_Pro...,...,,,,,,,2024,Mar,Apr,2024
1,0springer3@live.com,Daniel,Wiggins,Walmart Aberdeen NC,ACTIVE,Tier 1,79,4.7,GREEN,2024MarPerf_RewardApril2024__Driverloyalty_Pro...,...,,,,,,,2024,Mar,Apr,2024
2,1alexisrochoa@gmail.com,Alexis,Ochoa,Walmart Kenosha,ACTIVE,Tier 1,20,5.0,GREEN,2024MarPerf_RewardApril2024__Driverloyalty_Pro...,...,,,,,,,2024,Mar,Apr,2024
3,1b1s4lyfe@gmail.com,Latoya,Ritter,Walmart Columbia SC,ACTIVE,Tier 1,30,4.7,GREEN,2024MarPerf_RewardApril2024__Driverloyalty_Pro...,...,,,,,,,2024,Mar,Apr,2024
4,1belladog2@gmail.com,Tome,Pereira,Walmart Springfield MA,ACTIVE,Tier 1,53,5.0,GREEN,2024MarPerf_RewardApril2024__Driverloyalty_Pro...,...,,,,,,,2024,Mar,Apr,2024


In [122]:
# load to a BQ table
table_id = CONFIG['bq_table_nm']
job_config = bigquery.LoadJobConfig(write_disposition="WRITE_TRUNCATE") # overwrite existing data

client = bigquery.Client()

load_job = client.load_table_from_dataframe(all_data_clean, table_id, job_config=job_config)
# Wait for load job to complete.
load_job.result()



<google.cloud.bigquery.job.load.LoadJob at 0x21e3f6bc970>