<a href="https://colab.research.google.com/github/itskhawer/OnlineArchiveExtractor/blob/main/Online_Archive_Extractor_using_Link.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import shutil
import requests
import zipfile
!pip install rarfile
import rarfile
import tarfile
from google.colab import drive
from tqdm import tqdm

# Mount Google Drive
drive.mount('/content/drive')

# Ensure the 'Extracted' folder exists
extract_folder = "/content/drive/My Drive/Extracted"
os.makedirs(extract_folder, exist_ok=True)


def download_file(url, temp_save_path):
    """
    Downloads a file from the given URL to a temporary location.

    :param url: URL of the file to download
    :param temp_save_path: Temporary file save location
    :return: None
    """
    try:
        # Send a GET request to the URL
        response = requests.get(url, stream=True)
        response.raise_for_status()  # Raise an exception for bad status codes

        # Get the total file size
        file_size = int(response.headers.get('content-length', 0))

        # Initialize the progress bar
        progress_bar = tqdm(total=file_size, unit='B', unit_scale=True, desc="Downloading")

        # Write content to the temporary file
        with open(temp_save_path, 'wb') as file:
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:  # Skip empty chunks
                    file.write(chunk)
                    progress_bar.update(len(chunk))

        progress_bar.close()
        print("File downloaded successfully!")
    except requests.exceptions.RequestException as err:
        print(f"Error during download: {err}")
        raise


def extract_file(file_path, extract_to):
    """
    Extract the file from a given file path to the specified directory.
    The progress bar updates continuously during extraction.
    """
    try:
        total_size = 0
        extracted_size = 0
        chunk_size = 8192  # 8KB chunks to read during extraction
        total_files = 0
        current_file = 0

        # Determine total size of the compressed file and total files
        if file_path.endswith('.zip'):
            with zipfile.ZipFile(file_path, 'r') as zip_ref:
                total_size = sum(zinfo.file_size for zinfo in zip_ref.infolist())
                total_files = len(zip_ref.infolist())
        elif file_path.endswith('.rar'):
            with rarfile.RarFile(file_path, 'r') as rar_ref:
                total_size = sum(rinfo.file_size for rinfo in rar_ref.infolist())
                total_files = len(rar_ref.infolist())
        elif file_path.endswith(('.tar', '.tar.gz', '.tgz', '.tar.bz2')):
            with tarfile.open(file_path, 'r') as tar_ref:
                total_size = sum(member.size for member in tar_ref.getmembers())
                total_files = len(tar_ref.getmembers())
        else:
            print("Unsupported file format. Please upload a .zip, .rar, or .tar file.")
            return False

        # Initialize progress bar with ncols parameter to avoid it going to the next line
        progress_bar = tqdm(total=total_size, unit='B', unit_scale=True, desc="Extracting")

        # Extract files with progress monitoring
        if file_path.endswith('.zip'):
            with zipfile.ZipFile(file_path, 'r') as zip_ref:
                for zinfo in zip_ref.infolist():
                    current_file += 1
                    progress_bar.set_description(f"Extracting File {current_file}/{total_files}")
                    with zip_ref.open(zinfo) as src, open(os.path.join(extract_to, zinfo.filename), 'wb') as dest:
                        # Copy in chunks and update progress bar
                        while True:
                            chunk = src.read(chunk_size)
                            if not chunk:
                                break
                            dest.write(chunk)
                            extracted_size += len(chunk)
                            progress_bar.update(len(chunk))
        elif file_path.endswith('.rar'):
            with rarfile.RarFile(file_path, 'r') as rar_ref:
                for rinfo in rar_ref.infolist():
                    current_file += 1
                    progress_bar.set_description(f"Extracting File {current_file}/{total_files}")
                    with rar_ref.open(rinfo) as src, open(os.path.join(extract_to, rinfo.filename), 'wb') as dest:
                        # Copy in chunks and update progress bar
                        while True:
                            chunk = src.read(chunk_size)
                            if not chunk:
                                break
                            dest.write(chunk)
                            extracted_size += len(chunk)
                            progress_bar.update(len(chunk))
        elif file_path.endswith(('.tar', '.tar.gz', '.tgz', '.tar.bz2')):
            with tarfile.open(file_path, 'r') as tar_ref:
                for member in tar_ref.getmembers():
                    current_file += 1
                    progress_bar.set_description(f"Extracting File {current_file}/{total_files}")
                    with tar_ref.extractfile(member) as src, open(os.path.join(extract_to, member.name), 'wb') as dest:
                        # Copy in chunks and update progress bar
                        while True:
                            chunk = src.read(chunk_size)
                            if not chunk:
                                break
                            dest.write(chunk)
                            extracted_size += len(chunk)
                            progress_bar.update(len(chunk))

        progress_bar.close()
        print(f"\nExtracted files to {extract_to}")
        return True

    except Exception as e:
        print(f"Error during extraction: {e}")
        return False


def main():
    url = input("Enter the link to the compressed file: ").strip()
    file_name = url.split("/")[-1]
    download_path = f"/content/{file_name}"

    try:
        # Step 1: Download the file
        download_file(url, download_path)

        # Step 2: Create a unique folder for extracted files
        base_name = os.path.splitext(file_name)[0]
        output_folder = os.path.join(extract_folder, base_name)
        os.makedirs(output_folder, exist_ok=True)

        # Step 3: Extract the file
        success = extract_file(download_path, output_folder)

        # Step 4: Delete the compressed file if extraction was successful
        if success:
            os.remove(download_path)
            print(f"Deleted the compressed file: {download_path}")

    except Exception as e:
        print(f"An error occurred: {e}")


if __name__ == "__main__":
    main()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Enter the link to the compressed file: https://archive.org/download/princess-bride/Princess%20Bride.rar


Downloading: 100%|██████████| 1.01G/1.01G [08:42<00:00, 1.93MB/s]


File downloaded successfully!


Extracting File 1/1: 100%|██████████| 977M/977M [00:26<00:00, 36.5MB/s]



Extracted files to /content/drive/My Drive/Extracted/Princess%20Bride
Deleted the compressed file: /content/Princess%20Bride.rar
