In [None]:

# -------------------------
# ----- 1. GET NEW TOKEN -----
# -------------------------

import requests
import json
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

M2_BASE_URL = os.getenv("BASE_URL")
M2_USERNAME = os.getenv("USERNAME")
M2_PASSWORD = os.getenv("PASSWORD")
M2_OTP_CODE = "953480"  # Replace this with the current 6-digit code from your Google Authenticator app

# Payload for Google Authenticator 2FA
payload = {
    "username": M2_USERNAME,
    "password": M2_PASSWORD,
    "otp": M2_OTP_CODE  # Enter the current OTP code from the authenticator
}

# Make the POST request to the 2FA authentication endpoint
response = requests.post(f"{BASE_URL}/rest/V1/tfa/provider/google/authenticate", headers={"Content-Type": "application/json"}, data=json.dumps(payload))

# Check if the authentication is successful
if response.status_code == 200:
    access_token = response.json()  # This will be the token you need for subsequent requests
    print("Copy Paste below the New Access Token:\n", access_token)
else:
    print("Error fetching token:", response.text)

In [None]:
# -------------------------------------------
# ----- 2. FETCH ORDER AND ITEM DETAILS --------
# -------------------------------------------
import requests
import pandas as pd
from dotenv import load_dotenv
import os
from tqdm import tqdm  # for the progress bar
import time

# Magento API Credentials
M2_BASE_URL = os.getenv("BASE_URL")
M2_ACCESS_TOKEN = os.getenv("ACCESS_TOKEN")

M2_REST_URL = f"{M2_BASE_URL}/rest/V1"

# Headers for authentication
HEADERS = {
    "Authorization": f"Bearer {M2_ACCESS_TOKEN}",
    "Content-Type": "application/json"
}

def fetch_orders(from_date, to_date, page=1):
    """
    Fetches orders created between two dates.
    Uses pagination to fetch results.
    """
    url = (
        f"{M2_REST_URL}/orders?"
        f"searchCriteria[filter_groups][0][filters][0][field]=created_at&"
        f"searchCriteria[filter_groups][0][filters][0][value]={from_date} 00:00:00&"
        f"searchCriteria[filter_groups][0][filters][0][condition_type]=from&"
        f"searchCriteria[filter_groups][1][filters][0][field]=created_at&"
        f"searchCriteria[filter_groups][1][filters][0][value]={to_date} 23:59:59&"
        f"searchCriteria[filter_groups][1][filters][0][condition_type]=to&"
        f"searchCriteria[pageSize]=50&"
        f"searchCriteria[currentPage]={page}"
    )

    response = requests.get(url, headers=HEADERS)

    if response.status_code == 200:
        return response.json()
    else:
        print("Error fetching orders:", response.text)
        return None

def format_order_data(orders_data):
    """
    Formats the retrieved order data into a structured dataframe.
    Each row corresponds to a single item in the order.
    """
    formatted_data = []

    for order in orders_data.get('items', []):
        order_id = order.get('entity_id')
        created_at = order.get('created_at')
        grand_total = order.get('grand_total')
        currency = order.get('order_currency_code')
        status = order.get('status')

        # Customer details
        customer_name = f"{order.get('customer_firstname', '')} {order.get('customer_lastname', '')}".strip()
        customer_email = order.get('customer_email')
        billing_address = order.get('billing_address', {})
        city = billing_address.get('city', '')
        country = billing_address.get('country_id', '')

        # Order details
        payment = order.get('payment', {})
        payment_method = payment.get('method', 'N/A')

        # Extract individual items and create a row for each
        items = order.get('items', [])
        for item in items:
            formatted_data.append({
                "Order ID": order_id,
                "Date": created_at,
                "Order Total": f"{grand_total} {currency}",
                "Order Status": status,
                "Customer Name": customer_name,
                "Customer Email": customer_email,
                "City": city,
                "Country": country,
                "Payment Method": payment_method,
                "Item Name": item.get('name'),
                "SKU": item.get('sku'),
                "Quantity": item.get('qty_ordered'),
                "Price per Unit": f"{item.get('price')} {currency}",
                "Total Item Price": f"{item.get('row_total')} {currency}",
            })

    return pd.DataFrame(formatted_data)

def fetch_all_orders(from_date, to_date):
    """
    Fetches all orders between a given date range in an iterative way.
    It checks the last date from the fetched orders and continues until all orders are retrieved.
    """
    all_orders_data = []
    current_date = from_date
    page = 1
    while True:
        print(f"Fetching orders for {current_date} (page {page})...")
        orders_data = fetch_orders(current_date, to_date, page)
        
        if not orders_data or not orders_data.get('items'):
            print("No more orders found.")
            break

        all_orders_data.append(orders_data)
        # Check the last order date from the current batch of orders to adjust the current_date
        last_order_date = orders_data['items'][-1]['created_at']
        current_date = last_order_date.split('T')[0]  # Date part of the last order date
        
        page += 1
        time.sleep(1)

    # Combine all fetched data into a single DataFrame
    all_formatted_data = []
    for orders_data in all_orders_data:
        all_formatted_data.append(format_order_data(orders_data))
    
    return pd.concat(all_formatted_data, ignore_index=True)

# Define Date Range (Modify as Needed)
from_date = "2024-01-01"  # Start Date (YYYY-MM-DD)
to_date = "2024-01-10"    # End Date (YYYY-MM-DD)

# Fetch all orders with a progress bar
print("Starting to fetch orders...")
df_magento_orders = fetch_all_orders(from_date, to_date)

# Save the result to a CSV file
if not df_magento_orders.empty:
    df_magento_orders.to_csv("magento_orders.csv", index=False)
    print("Formatted orders saved to CSV file.")
else:
    print("No orders found.")


In [None]:
# -------------------------------------------
# -----      3. SEND DATA TO BQ         --------
# -------------------------------------------
from dotenv import load_dotenv
import os
import pandas as pd 
from google.cloud import bigquery

# Magento API Credentials
BQ_PATH_KEY = os.getenv("PATH_KEY")
BQ_PROJECT_ID = os.getenv("PROJECT_ID")
BQ_DATASET_ID = os.getenv("DATASET_ID")
BQ_TABLE_ID = os.getenv("TABLE_ID")

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = BQ_PATH_KEY

# Define BigQuery parameters
project_id = BQ_PROJECT_ID
dataset_id = BQ_DATASET_ID
table_id = BQ_TABLE_ID

# Full table ID in the format 'project.dataset.table'
table_full_id = f"{project_id}.{dataset_id}.{table_id}"

df_magento_orders = pd.read_csv("magento_orders.csv")
df_magento_orders
# Upload the DataFrame to BigQuery
df_magento_orders.to_gbq(destination_table=table_full_id, project_id=project_id, if_exists='replace')
print('Data uploaded successfully!')