In [18]:
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
import openai
from openai import OpenAI
from dotenv import load_dotenv, find_dotenv
import os
import json

load_dotenv(find_dotenv())

True

In [19]:
blob_service_client = BlobServiceClient.from_connection_string(os.getenv("AZURE_STORAGE_CONNECTION_STRING"))
container_client = blob_service_client.get_container_client(os.getenv("AZURE_CONTAINER"))

client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
)


In [22]:
staging_dir = '/staging'
os.makedirs(staging_dir, exist_ok=True)


blob_paths = [
'BUS5000/Introduction/10dayMBA - Intro.pdf',
'BUS5000/Introduction/STUDENT_NOTES/s1/s1w0.docx',
'BUS5000/Introduction/STUDENT_NOTES/s2/s2w0.docx'
]
# List to store file objects
uploaded_files = []

for blob_path in blob_paths:
    # Adjust the path to save in the staging directory
    staging_path = os.path.join(staging_dir, os.path.basename(blob_path))

    # Download the file from Azure Blob
    blob_client = container_client.get_blob_client(blob_path)
    with open(staging_path, "wb") as download_file:
        download_file.write(blob_client.download_blob().readall())

    # Upload the file to OpenAI
    with open(staging_path, "rb") as file:
        response = client.files.create(file=file, purpose="assistants")
        uploaded_files.append(response)

    # Delete the file from the staging directory
    os.remove(staging_path)

# Function to convert FileObject to a serializable dictionary
def file_object_to_dict(file_obj):
    return {
        'id': file_obj.id,
        'bytes': file_obj.bytes,
        'created_at': file_obj.created_at,
        'filename': file_obj.filename,
        'object': file_obj.object,
        'purpose': file_obj.purpose,
        'status': file_obj.status,
        'status_details': file_obj.status_details
    }

# Convert each FileObject in the list to a dictionary
file_dicts = [file_object_to_dict(file_obj) for file_obj in uploaded_files]

# Save the list of dictionaries as a JSON list
json_path = './.bin/files.json'
with open(json_path, 'w') as json_file:
    json.dump(file_dicts, json_file)

In [22]:
def delete_files_from_openai():
    files = client.files.list()
    # Check if there are no files
    if not files.data:
        print("ending..")
        return  # End the function if there are no files

    # If there are files, proceed with deletion
    for file in files.data:
        file_id = file.id
        client.files.delete(
            file_id=file_id
        )    
        print(f"Deleted file {file_id}")
    
    files = client.beta.assistants.files.list(
        assistant_id=os.getenv("OPENAI_ASSISTANT")
    )

    # Check if there are no files
    if not files.data:
        print("ending..")
        return  # End the function if there are no files

    # If there are files, proceed with deletion
    for file in files.data:
        file_id = file.id
        client.beta.assistants.files.delete(
            assistant_id=os.getenv("OPENAI_ASSISTANT"),
            file_id=file_id
        )
    
# Call the function to execute
delete_files_from_openai()


Deleted file file-183J5qW4YR5swyMd67rIPLFA
Deleted file file-2iMor5j5BBN8jYLou1rvNgl2
Deleted file file-Rz4NLaIi5bdhsDUeDwy674r4
Deleted file file-hOIlgRLDBy7aEH3Jgu3mjb9B
Deleted file file-lGUQHlgWAKeozDd08feZcRtM
Deleted file file-9IJfZpAfX39gZw2iT3CpHYtk
Deleted file file-y5wq3UbFHy1YZdaLjY0W9AIH
Deleted file file-l4cV4p8PMxxdPb5utucARJ0r
Deleted file file-RBs8iX0qsVwxEKFIivKTmPBe
Deleted file file-nmf9Fi5S15uvasWHijN5vvXK
Deleted file file-y5aek5I2heiKhBRQH0YmCIAS
Deleted file file-qCpLcxbvBROxCG1ehFSZY5Oh
Deleted file file-xezeLthSxhG1tlH6F9tZa6mf
Deleted file file-lwXRv8ScOAwOdvkwGxmuanaY
Deleted file file-exAi161HW9VCQoV8tqVigC1O
ending..
