# Data Export 
-

In [5]:
#pip install gspread

In [6]:
import pandas as pd
import gspread
from google.oauth2.service_account import Credentials

In [35]:
key = '/Users/du/dup/analytics/keys/dup1966gskey.json'
shname ='PIIT data from Python'

In [15]:
creds = Credentials.from_service_account_file ( key, scopes=[  "https://www.googleapis.com/auth/spreadsheets",     "https://www.googleapis.com/auth/drive" ])
client = gspread.authorize(creds)
client

<gspread.client.Client at 0x1439d1190>

In [36]:
sheet = client.open(shname).sheet1

In [37]:
sheet

<Worksheet 'ML' id:0>

In [21]:
# Write a DataFrame
df = pd.DataFrame({"Name": ["Alice", "Bob"], "Score": [90, 85]})
df

Unnamed: 0,Name,Score
0,Alice,90
1,Bob,85


In [None]:
sheet.update([df.columns.values.tolist()] + df.values.tolist())

## other sheet
-  

In [38]:
# Open spreadsheet by name
spreadsheet = client.open(shname)
spreadsheet

<Spreadsheet 'PIIT data from Python' id:1L9zhEJd_xWNWPXwliTvEBqah8nQ6YS2xjwvuqtqhYtM>

###  --- Option 1: Create a NEW sheet ---

In [24]:
new_sheet = spreadsheet.add_worksheet(title="NewData", rows="100", cols="20")
new_sheet.update([df.columns.values.tolist()] + df.values.tolist())

{'spreadsheetId': '1L9zhEJd_xWNWPXwliTvEBqah8nQ6YS2xjwvuqtqhYtM',
 'updatedRange': 'NewData!A1:B3',
 'updatedRows': 3,
 'updatedColumns': 2,
 'updatedCells': 6}

### --- Option 2: Write to an EXISTING sheet by name ---

In [25]:
existing_sheet = spreadsheet.worksheet("Sheet2")  # must exist already
existing_sheet.clear()

{'spreadsheetId': '1L9zhEJd_xWNWPXwliTvEBqah8nQ6YS2xjwvuqtqhYtM',
 'clearedRange': 'Sheet2!A1:Z1000'}

In [26]:
existing_sheet.update([df.columns.values.tolist()] + df.values.tolist())

{'spreadsheetId': '1L9zhEJd_xWNWPXwliTvEBqah8nQ6YS2xjwvuqtqhYtM',
 'updatedRange': 'Sheet2!A1:B3',
 'updatedRows': 3,
 'updatedColumns': 2,
 'updatedCells': 6}

# pygsheets Library
- 

In [2]:
#pip install pygsheets

In [27]:
import pandas as pd
import pygsheets
import os

In [None]:
os.listdir('/Users/du/dup/auData/keys')

In [28]:
# Authenticate
gc = pygsheets.authorize(service_file=key)

In [39]:
sh = gc.open(shname)
sh

<Spreadsheet 'PIIT data from Python' Sheets:4>

###  --- Option 1: Create new worksheet ---

In [40]:
wks_new = sh.add_worksheet(title="NewData3", rows=100, cols=20)
wks_new.set_dataframe(df, (1, 1))

### --- Option 2: Write to existing worksheet ---

In [33]:
wks_existing = sh.worksheet_by_title("Sheet2")
wks_existing.clear()
wks_existing.set_dataframe(df, (1, 1))

## Sheet Names

In [41]:
sheetList = gc.open(shname).worksheets()
sheetList

[<Worksheet 'ML' index:0>,
 <Worksheet 'Sheet2' index:1>,
 <Worksheet 'NewData' index:2>,
 <Worksheet 'NewData2' index:3>,
 <Worksheet 'NewData3' index:4>]

In [42]:
# Open sheet
sh = gc.open(shname)
wks = sh[0]  # first worksheet
wks

<Worksheet 'ML' index:0>

In [44]:
# Write DataFrame
df = pd.DataFrame({'A': [1, 2, 3], 'B': ['x', 'y', 'z']})
df

Unnamed: 0,A,B
0,1,x
1,2,y
2,3,z


In [45]:
wks.set_dataframe(df, (1, 1))

## Read Data
### Open the spreadsheet (choose ONE of the three lines below)
- spreadsheet = client.open("My Google Spreadsheet")          # by name
- spreadsheet = client.open_by_url("https://docs.google.com/spreadsheets/d/<KEY>/edit#gid=<GID>")
- spreadsheet = client.open_by_key("<SPREADSHEET_KEY>")          # by key (recommended)
- https://docs.google.com/spreadsheets/d/1L9zhEJd_xWNWPXwliTvEBqah8nQ6YS2xjwvuqtqhYtM/edit?gid=2141895426#gid=2141895426

In [48]:
#pip install gspread_dataframe

In [49]:
import pandas as pd
import gspread
from google.oauth2.service_account import Credentials
from gspread_dataframe import get_as_dataframe

In [50]:
creds = Credentials.from_service_account_file(key,scopes=[   "https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive",   ],)
client = gspread.authorize(creds)

In [52]:
gsid ='1L9zhEJd_xWNWPXwliTvEBqah8nQ6YS2xjwvuqtqhYtM'
shr = client.open_by_key(gsid)          # by key (recommended)

In [54]:
sheetList = shr.worksheets()
sheetList

[<Worksheet 'ML' id:0>,
 <Worksheet 'Sheet2' id:2141895426>,
 <Worksheet 'NewData' id:565522700>,
 <Worksheet 'NewData2' id:2018298584>,
 <Worksheet 'NewData3' id:246207265>]

In [55]:
#  Select the worksheet/tab
ws = spreadsheet.worksheet("Sheet2")  # e.g., your “orders” sheet
ws

<Worksheet 'Sheet2' id:2141895426>

In [56]:
# 4) Import to pandas (first row as header; keep empty rows/cols off)
df = get_as_dataframe(ws, evaluate_formulas=True, header=0, dtype=str, na_filter=True)
df

Unnamed: 0,Name,Score
0,Alice,90
1,Bob,85


## Read all sheets/ tabs at once

In [57]:
def read_all_tabs(spreadsheet_ref, service_account_file="service_account.json"):
    scopes = [
        "https://www.googleapis.com/auth/spreadsheets",
        "https://www.googleapis.com/auth/drive",
    ]
    creds = Credentials.from_service_account_file(key, scopes=scopes)
    client = gspread.authorize(creds)

    sh = client.open_by_url(spreadsheet_ref) if spreadsheet_ref.startswith("http") else client.open(spreadsheet_ref)
    data = {}
    for ws in sh.worksheets():
        df = get_as_dataframe(ws, evaluate_formulas=True, header=0, dtype=str, na_filter=True)
        data[ws.title] = df
    return data

In [59]:
key = '/Users/du/dup/analytics/keys/dup1966gskey.json'
shname ='PIIT data from Python'

In [58]:
creds = Credentials.from_service_account_file(key,scopes=[   "https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive",   ],)
client = gspread.authorize(creds)
gsid ='1L9zhEJd_xWNWPXwliTvEBqah8nQ6YS2xjwvuqtqhYtM'
shr = client.open_by_key(gsid)          # by key (recommended)
sheetList = shr.worksheets()
sheetList

[<Worksheet 'ML' id:0>,
 <Worksheet 'Sheet2' id:2141895426>,
 <Worksheet 'NewData' id:565522700>,
 <Worksheet 'NewData2' id:2018298584>,
 <Worksheet 'NewData3' id:246207265>]

In [70]:
shname2='PIIT data'
tabs = read_all_tabs(shname2)
tabs['ML'].head()

Unnamed: 0.1,Unnamed: 0,Unnamed: 1
0,sheet,description


In [68]:
# Another Sheet
gsid2 = "19ReQlRfDQHcV1OFUnmVkiFY_1IrJeOR0g1RmrjfjMD4"
shr2 = client.open_by_key(gsid2)          # by key (recommended)
sheetList2 = shr2.worksheets()
sheetList2

[<Worksheet 'ML' id:0>,
 <Worksheet 'links' id:1994012438>,
 <Worksheet 'student1' id:940279727>,
 <Worksheet 'T1A' id:117623602>,
 <Worksheet 'snames' id:1541623921>,
 <Worksheet 'orders' id:764977169>,
 <Worksheet 'returns' id:675685971>,
 <Worksheet 'people' id:1527535056>,
 <Worksheet 'sales1' id:252067454>]

In [69]:
tabs = read_all_tabs(shname)
tabs['ML'].head()

Unnamed: 0,A,B
0,1,x
1,2,y
2,3,z


In [66]:
def select_tabs(spreadsheet_ref, tab_names, service_account_file="service_account.json"):
    scopes = [
        "https://www.googleapis.com/auth/spreadsheets",
        "https://www.googleapis.com/auth/drive",
    ]
    creds = Credentials.from_service_account_file(key, scopes=scopes)
    client = gspread.authorize(creds)

    sh = client.open_by_url(spreadsheet_ref) if spreadsheet_ref.startswith("http") else client.open(spreadsheet_ref)
    data = {}
    for tab in tab_names:
        try:
            ws = sh.worksheet(tab)
            df = get_as_dataframe(ws, evaluate_formulas=True, header=0, dtype=str, na_filter=True)
            data[tab] = df
        except gspread.exceptions.WorksheetNotFound:
            print(f"⚠ Tab '{tab}' not found.")
    return data
    for ws in sh.worksheets():
        df = get_as_dataframe(ws, evaluate_formulas=True, header=0, dtype=str, na_filter=True)
        data[ws.title] = df
    return data

In [63]:
def read_selected_tabs(spreadsheet_ref, tab_names, service_account_file="service_account.json"):
    scopes = [    "https://www.googleapis.com/auth/spreadsheets",   "https://www.googleapis.com/auth/drive", ]
    creds = Credentials.from_service_account_file(key, scopes=scopes)
    client = gspread.authorize(creds)

    # Open spreadsheet
    if spreadsheet_ref.startswith("http"):
        sh = client.open_by_url(spreadsheet_ref)
    else:
        sh = client.open(spreadsheet_ref)

    data = {}
    for tab in tab_names:
        try:
            ws = sh.worksheet(tab)
            df = get_as_dataframe(ws, evaluate_formulas=True, header=0, dtype=str, na_filter=True)
            data[tab] = df
        except gspread.exceptions.WorksheetNotFound:
            print(f"⚠ Tab '{tab}' not found.")
    return data

In [71]:
# Example usage
tabs_to_read = ["orders", "returns"]  # list of desired tabs
gsheet2 = "19ReQlRfDQHcV1OFUnmVkiFY_1IrJeOR0g1RmrjfjMD4"
tabs2 = select_tabs('PIIT data', tabs_to_read)
tabs2['orders'].head()

Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,...,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
0,1,CA-2016-152156,8/11/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
1,2,CA-2016-152156,8/11/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.9399999999999,3,0.0,219.582
2,3,CA-2016-138688,12/6/2016,16/6/2016,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,...,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714
3,4,US-2015-108966,11/10/2015,18/10/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.03100000000006
4,5,US-2015-108966,11/10/2015,18/10/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.516399999999999


In [73]:
tabs2['returns'].shape

(296, 2)

In [75]:
tabs2['returns'].head()

Unnamed: 0,Returned,Order ID
0,Yes,CA-2017-153822
1,Yes,CA-2017-129707
2,Yes,CA-2014-152345
3,Yes,CA-2015-156440
4,Yes,US-2017-155999
