# Google Drive File Downloader
This notebook downloads a zip file from Google Drive and extracts its contents.


In [None]:
import os
import requests
import zipfile
import io
import re

# URL of the Google Drive file
url = "https://drive.google.com/file/d/1Ej3byvWWMUuAN5BD3LZAYKvNJ0PWiEK0/view?usp=sharing"


## Helper Functions


In [None]:
def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value
    return None

def get_file_id_from_url(url):
    pattern = r'/file/d/([a-zA-Z0-9_-]+)'
    match = re.search(pattern, url)
    if match:
        return match.group(1)
    return None

def download_file_from_google_drive(url, destination=None):
    file_id = get_file_id_from_url(url)
    if not file_id:
        raise ValueError("Could not extract file ID from URL")
    
    download_url = f"https://drive.google.com/uc?id={file_id}&export=download"
    
    session = requests.Session()
    
    response = session.get(download_url, stream=True)
    token = get_confirm_token(response)
    
    if token:
        params = {'id': file_id, 'confirm': token, 'export': 'download'}
        response = session.get("https://drive.google.com/uc", params=params, stream=True)
    
    if destination:
        with open(destination, 'wb') as f:
            for chunk in response.iter_content(32768):
                if chunk:
                    f.write(chunk)
        return destination
    else:
        return response.content

## Download and Extract the Zip File


In [None]:
print("Starting download from Google Drive...")
file_content = download_file_from_google_drive(url)
print("Download complete!")

zip_file = io.BytesIO(file_content)

print("Extracting zip file...")
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
    file_list = zip_ref.namelist()
    print(f"Files in the zip archive: {len(file_list)}")
    
    zip_ref.extractall(".")
    
print("Extraction complete!")

extracted_top_level = set()
for file_path in file_list:
    top_level = file_path.split('/')[0] if '/' in file_path else file_path
    extracted_top_level.add(top_level)

print("\nExtracted top-level files/directories:")
for item in sorted(extracted_top_level):
    print(f"- {item}")


## Verification
Check if the files were extracted successfully.


In [None]:
print("Verifying extraction...")
for item in sorted(extracted_top_level):
    if os.path.exists(item):
        if os.path.isdir(item):
            file_count = sum([len(files) for _, _, files in os.walk(item)])
            print(f"✅ Directory '{item}' exists with {file_count} files")
        else:
            print(f"✅ File '{item}' exists")
    else:
        print(f"❌ '{item}' does not exist")

print("\nDownload and extraction process completed!")