In [5]:
import os
from dotenv import load_dotenv
import kaggle
import tkinter as tk
from tkinter import scrolledtext, simpledialog

# Load environment variables from the .env file
env_path = '/Users/paigeblackstone/Desktop/Portfolio29/Portfolio29/env/.env'
load_dotenv(env_path)

def authenticate_kaggle():
    try:
        kaggle.api.authenticate()  # Authenticate using credentials from ~/.kaggle
        print("API authenticated successfully.")
    except Exception as e:
        print(f"An error occurred during authentication: {e}")

def search_datasets(query):
    try:
        authenticate_kaggle()
        print("Fetching datasets...")

        # Fetch datasets
        datasets = kaggle.api.dataset_list(search=query, sort_by='votes')

        if datasets is None:
            print("API call returned None.")
            return []

        print(f"Number of datasets returned: {len(datasets)}")

        # Print attributes of all datasets for debugging
        for i, dataset in enumerate(datasets):
            print(f"Dataset {i}:")
            dataset_attrs = vars(dataset)
            for key, value in dataset_attrs.items():
                if value is None:
                    print(f"Attribute '{key}' is None")
                else:
                    print(f"{key}: {value}")

        return datasets
    except Exception as e:
        print(f"An error occurred during dataset search: {e}")
        return []

def on_search():
    query = search_entry.get()
    tags = tags_entry.get().split(',')

    print(f"Search Query: '{query}'")
    print(f"Tags: {tags}")

    datasets = search_datasets(query)

    result_text.delete('1.0', tk.END)
    dataset_listbox.delete(0, tk.END)

    if datasets is None or len(datasets) == 0:
        result_text.insert(tk.END, "No datasets found or an error occurred.\n")
    else:
        for dataset in datasets:
            try:
                title = getattr(dataset, 'title', "No title available") or "No title available"
                dataset_id = getattr(dataset, 'ref', "No ID available") or "No ID available"
                url = getattr(dataset, 'url', "No URL available") or "No URL available"
                description = getattr(dataset, 'description', "No description available") or "No description available"
                last_updated = getattr(dataset, 'last_updated', "No update date available") or "No update date available"
                size = getattr(dataset, 'size', "No size information available") or "No size information available"
                rows = getattr(dataset, 'total_rows', "No row information available") or "No row information available"

                result_text.insert(tk.END, f"Title: {title}\n")
                result_text.insert(tk.END, f"ID: {dataset_id}\n")
                result_text.insert(tk.END, f"URL: {url}\n")
                result_text.insert(tk.END, f"Description: {description}\n")
                result_text.insert(tk.END, f"Last Updated: {last_updated}\n")
                result_text.insert(tk.END, f"Size: {size}\n")
                result_text.insert(tk.END, f"Rows: {rows}\n")
                result_text.insert(tk.END, "-----------\n")

                # Add dataset to listbox for selection
                dataset_listbox.insert(tk.END, dataset_id)
            except Exception as e:
                print(f"An error occurred while processing dataset attributes: {e}")

def download_dataset(dataset_id, path):
    try:
        authenticate_kaggle()
        kaggle.api.dataset_download_files(dataset_id, path=path, unzip=True)
        print(f"Dataset {dataset_id} downloaded successfully.")
    except Exception as e:
        print(f"An error occurred while downloading the dataset: {e}")

def on_download():
    selected_index = dataset_listbox.curselection()
    if not selected_index:
        result_text.insert(tk.END, "No dataset selected for download.\n")
        return

    dataset_id = dataset_listbox.get(selected_index[0])
    download_path = simpledialog.askstring("Download Path", "Enter path to save dataset:", initialvalue=os.getcwd())
    
    if download_path:
        print(f"Downloading dataset: {dataset_id}")
        download_dataset(dataset_id, download_path)
        result_text.insert(tk.END, f"Downloading dataset {dataset_id} to {download_path}...\n")

# Set up the main window
root = tk.Tk()
root.title("Kaggle Dataset Browser")

# Search Entry
search_label = tk.Label(root, text="Search Query:")
search_label.pack(pady=5)
search_entry = tk.Entry(root, width=50)
search_entry.pack(pady=5)

# Tags Entry (Optional)
tags_label = tk.Label(root, text="Tags (comma-separated):")
tags_label.pack(pady=5)
tags_entry = tk.Entry(root, width=50)
tags_entry.pack(pady=5)

# Search Button
search_button = tk.Button(root, text="Search", command=on_search)
search_button.pack(pady=5)

# Results Area
result_text = scrolledtext.ScrolledText(root, width=80, height=20)
result_text.pack(pady=5)

# Dataset Listbox
dataset_listbox = tk.Listbox(root, width=80, height=5)
dataset_listbox.pack(pady=5)

# Download Button
download_button = tk.Button(root, text="Download Selected Dataset", command=on_download)
download_button.pack(pady=5)

root.mainloop()


Search Query: 'financial'
Tags: ['']
API authenticated successfully.
Fetching datasets...
An error occurred during dataset search: unsupported operand type(s) for +: 'NoneType' and 'str'
