Tyler Beaulieu  
Tim Paylor
DS5110, Fall 2025  
December 9, 2025  

<h4 style="text-align:center;">Final Project</h4>

In [4]:
#Initial Imports
import os
import pandas as pd
from dotenv import load_dotenv
from googleapiclient.discovery import build

In [6]:
#Load Google Drive API Key from .env file
load_dotenv()
API_KEY = os.getenv('GOOGLE_DRIVE_API_KEY')
if not API_KEY:
    raise ValueError("Please set GOOGLE_DRIVE_API_KEY")

#List files in Google Drive
service = build('drive', 'v3', developerKey=API_KEY)

def get_folder_name(service, folder_id):
    #Get the name of a folder by its ID
    try:
        folder = service.files().get(fileId=folder_id, fields='name').execute()
        return folder.get('name', '')
    except:
        return ''

def list_files(service, folder_id, current_path=""):
    #List all files in a folder and its subfolders
    all_files = []
    
    # List all items in the current folder
    results = service.files().list(
        q=f"'{folder_id}' in parents",
        pageSize=1000,
        fields="files(id, name, mimeType, size, modifiedTime)"
    ).execute()
    
    items = results.get('files', [])
    
    for item in items:
        file_info = {
            'id': item['id'],
            'name': item['name'],
            'mimeType': item['mimeType'],
            'size': item.get('size', 'N/A'),
            'modifiedTime': item.get('modifiedTime', 'N/A'),
            'path': current_path
        }
        
        # Check if it's a folder
        if item['mimeType'] == 'application/vnd.google-apps.folder':
            # Add folder to list
            all_files.append(file_info)
            
            # Recursively get files from subfolder
            subfolder_path = os.path.join(current_path, item['name'])
            subfolder_files = list_files(service, item['id'], subfolder_path)
            all_files.extend(subfolder_files)
        else:
            # Add the file to the list
            all_files.append(file_info)
    
    return all_files

# The root folder ID
root_folder_id = '12qMZKDEWn71JrN8Het5-a_NUWV0eE1ZF'

# Get the root folder name for the path
root_folder_name = get_folder_name(service, root_folder_id)

# Get all files
file_list = list_files(service, root_folder_id, root_folder_name)

# Convert to pandas DataFrame
df = pd.DataFrame(file_list)

# Display results without folders
df_files_only = df[df['mimeType'] != 'application/vnd.google-apps.folder']
print(f"\nFound {len(df_files_only)} total items (files, not folders)")
print("\nFirst few entries:")
print(df_files_only.head(10))


Found 8672 total items (files, not folders)

First few entries:
                                   id  \
2   15WbZHuFUhe79g-cm7sQp-0v5EZSH0NMQ   
3   1kPD2Jg3TSk1mfqYGqVozfVUqvEZvZvBJ   
6   1gI7VVGbaQjP-6T0xB-OWOdwW6Ae2FCxT   
7   16gh23fs6ID6WSIxk4nrkbJO-1CM_a0HZ   
8   1E7TYkQjsML1ckSFMJE-oThzGYtBhI7uH   
9   1dJST5hK9G_4QjAoxy2X6fWIg445SI-fF   
10  1INTRSX59xNSczsQWpZ7zAsCBXauCQIIG   
11  1RnnmegRZIrUbrKpsgjtkkJ8OiU5bu4xm   
12  1kNv8fTffolaPOklDiOXW31DhDn4mdh6n   
14  1TRI-GgGQOKSIw_uiyJyPc85SHN6l-jGW   

                                            name                  mimeType  \
2   E2X93900_Draft 60% Review Set_2023.01.20.pdf           application/pdf   
3             E2X93900_60% Drawings_20230224.pdf           application/pdf   
6                        Image C08 site plan.tif                image/tiff   
7              IMT AsBuilts H2O and Sanitary.pdf           application/pdf   
8              IMT AsBuilts H2O and Sanitary.bak  application/octet-stream   
9              