In [2]:
# Install necessary libraries
!pip install PyMuPDF transformers
from google.colab import drive
from google.colab import files
import fitz  # PyMuPDF
from transformers import pipeline
import os





In [3]:
# Mount Google Drive
drive.mount('/content/drive')

# Create a "DoShare" folder in Google Drive if it doesn't exist
folder_name = "DoShare"
folder_path = f"/content/drive/My Drive/{folder_name}"
if not os.path.exists(folder_path):
    os.makedirs(folder_path)

# Function to upload PDF file
def upload_pdf():
    uploaded = files.upload()
    for fn in uploaded.keys():
        file_path = os.path.join(folder_path, fn)
        with open(file_path, "wb") as f:
            f.write(uploaded[fn])
        print(f'File "{fn}" uploaded to "{folder_path}"')
        return file_path

# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# Function to summarize text
def summarize_text(text):
    # Determine length of the summary based on the length of the text
    if len(text) < 500:
        max_length = 50
    elif len(text) < 2000:
        max_length = 100
    else:
        max_length = 200

    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    summary = summarizer(text, max_length=max_length, min_length=30, do_sample=False)
    return summary[0]['summary_text']


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
# Function to create a shareable link
def create_shareable_link(file_path):
    from googleapiclient.discovery import build
    from googleapiclient.http import MediaFileUpload
    from google.colab import auth
    from oauth2client.client import GoogleCredentials

    # Authenticate and create the Google Drive client
    auth.authenticate_user()
    creds = GoogleCredentials.get_application_default()
    drive_service = build('drive', 'v3', credentials=creds)

    # Check if "DoShare" folder exists; if not, create it
    folder_name = "DoShare"
    response = drive_service.files().list(q=f"name='{folder_name}' and mimeType='application/vnd.google-apps.folder'",
                                          spaces='drive').execute()
    files = response.get('files', [])

    if files:
        folder_id = files[0]['id']
    else:
        folder_metadata = {
            'name': folder_name,
            'mimeType': 'application/vnd.google-apps.folder'
        }
        folder = drive_service.files().create(body=folder_metadata, fields='id').execute()
        folder_id = folder.get('id')

    # Upload the file to the "DoShare" folder
    file_metadata = {
        'name': os.path.basename(file_path),
        'parents': [folder_id]
    }
    media = MediaFileUpload(file_path, mimetype='application/pdf')
    file = drive_service.files().create(body=file_metadata, media_body=media, fields='id').execute()

    # Make the file public and get a shareable link
    drive_service.permissions().create(
        fileId=file['id'],
        body={'role': 'reader', 'type': 'anyone'},
    ).execute()

    shareable_link = f"https://drive.google.com/file/d/{file['id']}/view?usp=sharing"
    return shareable_link

# Main function
def main():
    pdf_path = upload_pdf()
    text = extract_text_from_pdf(pdf_path)
    summary = summarize_text(text)
    print("\nSummary:\n", summary)

    shareable_link = create_shareable_link(pdf_path)
    print("\nShareable Link:\n", shareable_link)

# Run the main function
if __name__ == "__main__":
    main()


Saving 211010206_Aditya_Kaul(SDE).pdf to 211010206_Aditya_Kaul(SDE) (1).pdf
File "211010206_Aditya_Kaul(SDE) (1).pdf" uploaded to "/content/drive/My Drive/DoShare"


Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.



Summary:
 Aditya Kaul is a graduate of the International Institute of Information Technology, Raipur. He has worked for Birlasoft and WorkZera Pvt Ltd. He is a national gold medalist in kickboxing and wushu.

Shareable Link:
 https://drive.google.com/file/d/1IWWujJ7ZyQeWyocVQQKGkaTZZSjgsfr-/view?usp=sharing
