In [1]:
pip install PyPDF2 transformers torch

Note: you may need to restart the kernel to use updated packages.


In [2]:
# Step 1
import PyPDF2
from transformers import pipeline
import gradio as gr
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv('/users/shephalidubey/Documents/keys.env')

# Set OpenAI API key
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
    text = ""
    try:
        with open(pdf_path, "rb") as file:
            reader = PyPDF2.PdfReader(file)
            for page in reader.pages:
                text += page.extract_text() or ""
    except Exception as e:
        print(f"Error reading PDF: {e}")
    return text

# Function to summarize text
def summarize_text(text):
    max_input_length = 4096
    text_chunks = [text[i:i + max_input_length] for i in range(0, len(text), max_input_length)]

    summaries = []
    chat_model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-3.5-turbo", temperature=0)

    for chunk in text_chunks:
        message = HumanMessage(content=f"Summarize the following text:\n\n{chunk}")
        response = chat_model([message])
        summary = response.content
        summaries.append(summary)

    return "\n".join(summaries)

# Initialize the question-answering pipeline
question_answerer = pipeline("question-answering", model='distilbert-base-cased-distilled-squad')

# Function to answer questions based on the summary
def question_answer(summary, question):
    result = question_answerer(question=question, context=summary)
    answer = result['answer']
    score = result['score']
    start = result['start']
    end = result['end']
    
    return f"**Question:** {question}\n**Answer:** {answer}\n**Score:** {score:.4f}\n**Start Index:** {start}\n**End Index:** {end}"

# Main function to integrate PDF summarization and Gradio
def main(pdf_path):
    text = extract_text_from_pdf(pdf_path)
    if not text:
        return "No text found in PDF."
    
    summary = summarize_text(text)

    # Create the Gradio app
    app = gr.Interface(
        fn=lambda question: question_answer(summary, question),
        inputs=[
            gr.Textbox(label="Ask a question about the PDF summary.", lines=2)
        ],
        outputs=gr.Markdown(label="Answer to question, probability score, and location."),
        title="PDF Summary Question Answering",
        description="Ask questions based on the summary of the uploaded PDF."
    )

    # Launch the app
    app.launch(show_error=True)

# Example usage
if __name__ == "__main__":
    pdf_path = "/users/shephalidubey/Documents/AQI-DETECTIVES.pdf"  # Replace with your PDF file path
    main(pdf_path)


Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
  warn_deprecated(
  warn_deprecated(


Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


Explanation
extract_text_from_pdf: This function reads the PDF and extracts text from each page.
summarize_text: This function uses a pre-trained summarization model from Hugging Face to generate a summary. Adjust the max_length and min_length parameters to fit your summarization needs.
main: This function ties everything together. It extracts text from the PDF, summarizes it, and prints the summary.
Running the Code
Save the code to a file, e.g., pdf_summarizer.py.
Replace "example.pdf" with the path to your PDF file.
Run the script: python pdf_summarizer.py

For summarizing very large texts, especially when working with PDFs, consider the following models and pipelines that are known for handling long documents effectively:

1. Longformer
Model: allenai/longformer-large-4096
Description: Longformer is specifically designed to handle long documents by using a sparse attention mechanism. This allows it to process longer sequences than traditional transformers.facebook/bart-large-LED, which is a Longformer variant specifically designed for summarization tasks.
Simplified Pipeline: The summarization is handled more directly without needing to deal with tokenizers separately.
2. BigBird
Model: google/bigbird-roberta-base
Description: Similar to Longformer, BigBird can handle long sequences efficiently. It employs a combination of global and local attention, making it suitable for long texts.
3. LED (Longformer Encoder-Decoder)
Model: facebook/bart-large-LED
Description: This model is built on Longformer and is designed for sequence-to-sequence tasks, like summarization, while supporting longer inputs.OSError: facebook/bart-large-LED is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`
4. GPT-3 or GPT-4 (if accessible)
Description: If you have access to OpenAI’s API for GPT models, they can generate summaries for large texts effectively. You might need to break your text into smaller parts, but they handle context well.

Additional Considerations
Chunking: If the text is extremely long, you may still need to chunk it, as even these models have maximum input sizes.
Preprocessing: Ensure the text extracted from the PDF is clean and properly formatted before summarization to improve the quality of the output.
Performance: Keep an eye on performance and response time, especially with larger models, as they may require more resources.

*FLASK FRONTEND INTERFACE*

#Step 2: Create a Frontend Interface -->
#Set Up a Basic Web Interface: You can use Flask or Streamlit to create a simple web app. Here’s an example using Flask.

pip install flask

#Create the Flask App: Here’s a simple Flask app that allows file uploads.

In [None]:
from flask import Flask, request, render_template
import os

app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = 'uploads/'

@app.route('/')
def upload_file():
    return render_template('upload.html')

@app.route('/upload', methods=['POST'])
def handle_file_upload():
    if 'file' not in request.files:
        return "No file uploaded"
    file = request.files['file']
    if file.filename == '':
        return "No file selected"
    
    file_path = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
    file.save(file_path)

    summary = summarize_pdf(file_path)
    return f"<h1>Summary</h1><p>{summary}</p>"

if __name__ == '__main__':
    app.run(debug=True)

In [None]:
#Create the HTML Template: Create a folder named templates and add an upload.html file:

In [None]:
<!doctype html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Upload PDF</title>
</head>
<body>
    <h1>Upload PDF to Summarize</h1>
    <form method="post" action="/upload" enctype="multipart/form-data">
        <input type="file" name="file" accept=".pdf" required>
        <input type="submit" value="Upload">
    </form>
</body>
</html>

In [None]:
#Step 3: Run Your Application
#Run the Flask App: Run your Flask application using:

In [None]:
python app.py

Access the Web Interface: Open your web browser and go to http://127.0.0.1:5000/ to access the upload interface.

Step 4: Test the Application
Upload a PDF file through the interface, and it should return the summarized text.

Additional Considerations
Security: Ensure to handle file uploads securely to prevent any vulnerabilities.
Deployment: Consider deploying your app using platforms like Heroku or AWS for wider access.
Error Handling: Add error handling for file reading and summarization steps to improve user experience.
Feel free to customize the code and expand functionality as needed!

*CYBER SECURITY*

HOW TO ENSURE SECURE FILE UPLOADS TO PREVENT ANY VULNERABILITIES
Handling file uploads securely is crucial to prevent vulnerabilities and potential attacks. Here are some best practices to ensure secure file uploads:

1. Validate File Types
Restrict Allowed File Types: Only allow specific file types (e.g., PDFs). You can check the file extension and MIME type.

In [None]:
allowed_extensions = {'pdf'}

def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in allowed_extensions

2. Limit File Size
Set Maximum File Size: Restrict the size of the uploaded files to prevent denial of service attacks.

In [None]:
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024  # 16 MB

3. Use a Secure Upload Directory
Store Uploaded Files Outside the Web Root: Keep uploaded files in a directory not accessible via the web server.
Generate Unique Filenames: Rename uploaded files to avoid overwriting and to prevent predictable URLs.

In [None]:
import secure_filename
from werkzeug.utils import secure_filename

file_path = os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(file.filename))

4. Sanitize Input
Sanitize Filenames: Use libraries like werkzeug to ensure that filenames do not contain unsafe characters.
Avoid User-Controlled Paths: Do not let users specify directory paths for uploads.
5. Use Antivirus Scanning
Scan Files for Malware: Integrate antivirus scanning solutions to check uploaded files for malicious content.
6. Implement Access Controls
User Authentication: Ensure that only authenticated users can upload files.
Authorization: Check if the user has permission to perform uploads.
7. Set Appropriate Permissions
File and Directory Permissions: Ensure that the uploaded files and directories have restricted permissions (e.g., not writable by the web server).
8. Limit Execution
Disable Script Execution: If your upload directory is web-accessible, ensure that no scripts can be executed from that directory.
9. Monitor and Log Uploads
Logging: Keep logs of uploaded files and user activities for auditing and monitoring.
Alerts: Set up alerts for suspicious activities, such as multiple uploads from a single user.
10. Regular Security Reviews
Vulnerability Assessments: Regularly assess your application for security vulnerabilities.
Keep Libraries Updated: Regularly update libraries and frameworks to patch known vulnerabilities.
Example Implementation
Here's a snippet incorporating some of these security practices in your Flask app:

In [None]:
from flask import Flask, request, render_template
from werkzeug.utils import secure_filename
import os

app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = 'uploads/'
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024  # 16 MB
allowed_extensions = {'pdf'}

def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in allowed_extensions

@app.route('/')
def upload_file():
    return render_template('upload.html')

@app.route('/upload', methods=['POST'])
def handle_file_upload():
    if 'file' not in request.files:
        return "No file uploaded"
    
    file = request.files['file']
    
    if file.filename == '' or not allowed_file(file.filename):
        return "Invalid file type"

    file_path = os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(file.filename))
    file.save(file_path)

    # Add antivirus scanning here (see next markdown section on how to)

    return "File uploaded successfully"

TO ADD ANTIVIRUS SCANNING TO FILE UPLOAD PROCESS in a Flask application, you can use an antivirus library or integrate with an external antivirus service. One common approach is to use ClamAV, an open-source antivirus engine. Here’s how you can do it:

Step 1: Install ClamAV
Install ClamAV:
On Ubuntu/Debian:
bash
Copy code
sudo apt-get install clamav clamav-daemon
On macOS using Homebrew:
bash
Copy code
brew install clamav
Update ClamAV Database: After installing, update the virus definitions:
bash
Copy code
sudo freshclam
Step 2: Install the Python Library
You can use the pyclamd library to interact with ClamAV from Python. Install it using pip:

bash
Copy code
pip install pyclamd
Step 3: Integrate Antivirus Scanning into Your Code
Now, modify your Flask application to include antivirus scanning after the file is uploaded but before it’s processed. Here’s how you can do that:

python
Copy code
from flask import Flask, request, render_template
from werkzeug.utils import secure_filename
import os
import pyclamd

app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = 'uploads/'
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024  # 16 MB
allowed_extensions = {'pdf'}

def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in allowed_extensions

def scan_file(file_path):
    cd = pyclamd.ClamdUnixSocket()  # Adjust for your setup (Unix or TCP)
    if cd.ping():  # Check if ClamAV is running
        result = cd.scan_file(file_path)
        return result
    else:
        raise RuntimeError("ClamAV is not running")

@app.route('/')
def upload_file():
    return render_template('upload.html')

@app.route('/upload', methods=['POST'])
def handle_file_upload():
    if 'file' not in request.files:
        return "No file uploaded"
    
    file = request.files['file']
    
    if file.filename == '' or not allowed_file(file.filename):
        return "Invalid file type"

    file_path = os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(file.filename))
    file.save(file_path)

    # Scan the uploaded file for viruses
    scan_result = scan_file(file_path)
    if scan_result:
        return f"File uploaded successfully. Scan result: {scan_result}"
    else:
        return "File uploaded but no issues found."

if __name__ == '__main__':
    app.run(debug=True)
Step 4: Start ClamAV Daemon
Before running your Flask app, ensure that the ClamAV daemon is running:

bash
Copy code
sudo systemctl start clamav-daemon
Step 5: Run Your Application
Now you can run your Flask application. When a file is uploaded, it will be scanned by ClamAV before any processing takes place.

Notes
Error Handling: You should add error handling for cases where ClamAV is not running or if there are issues with the scanning process.
Performance: Scanning files may add some latency, especially for larger files, so consider this in your user experience.
Logging: Log scan results for monitoring and auditing purposes.
Periodic Updates: Regularly update your ClamAV database with freshclam to ensure you have the latest virus definitions.
By integrating antivirus scanning, you enhance the security of your file upload functionality significantly!

HOW TO DISABLE SCRIPT EXECUTION VIA WEB Disabling script execution in a web-accessible upload directory is crucial for preventing the execution of malicious scripts. Here are several methods to achieve this, depending on the web server you are using:

1. Using .htaccess (for Apache)
If you are using Apache as your web server, you can create or modify a .htaccess file in your upload directory to disable script execution.

plaintext
Copy code
# .htaccess file in the upload directory
<FilesMatch "\.(php|pl|cgi|py|jsp|asp|sh|rb)$">
    Order Allow,Deny
    Deny from all
</FilesMatch>

# Alternatively, to deny access to all scripts
Options -ExecCGI
AddHandler cgi-script .pl .py .jsp .asp .sh .rb
2. Using Nginx Configuration
For Nginx, you can configure the server block to disable script execution in the upload directory. Here's an example:

nginx
Copy code
location /uploads/ {
    # Deny access to script files
    location ~ \.(php|pl|cgi|py|jsp|asp|sh|rb)$ {
        deny all;
    }
    # Serve other files normally
    autoindex on;
}
3. Rename the Upload Directory
Consider naming the upload directory in a way that avoids common script extensions. For instance, you can use a name that doesn’t imply execution, like uploads_files/. This approach helps, but is not foolproof.

4. Change File Permissions
Ensure that the permissions of the upload directory do not allow script execution. For instance, in a Linux environment, you can set the permissions as follows:

bash
Copy code
chmod 755 uploads/
5. Use a Separate Domain or Subdomain
Consider serving uploaded files from a separate domain or subdomain that does not allow script execution. This isolates your main application and reduces risk.

6. File Validation and Sanitization
Even if script execution is disabled, always validate and sanitize filenames and file content to ensure no harmful files are uploaded. This can include checking the file type and size as discussed previously.

7. Use a Content Security Policy (CSP)
If applicable, implement a Content Security Policy to control resources the user agent is allowed to load. This doesn’t prevent script execution directly but adds an additional layer of security.

8. Keep Uploads Outside the Web Root
If possible, store uploads outside the web root directory. This is the most secure option, as it prevents direct access via the web server.

Example Implementation for .htaccess
Here’s an example .htaccess file that could be placed in your uploads directory:

plaintext
Copy code
# Deny execution of scripts
<FilesMatch "\.(php|pl|cgi|py|jsp|asp|sh|rb)$">
    Order Allow,Deny
    Deny from all
</FilesMatch>

# Disable CGI script execution
Options -ExecCGI
Conclusion
By implementing these methods, you can significantly reduce the risk of script execution in your upload directory and enhance the security of your application. Always combine these measures with other security practices for optimal protection.

Setting up appropriate and restricted file and directory permissions for uploaded files is crucial for enhancing the security of your application. Here’s a guide on how to configure these permissions effectively:

1. Use the Principle of Least Privilege
Grant only the minimum permissions necessary for users and processes to function. Avoid using overly permissive settings like 777, which allows everyone to read, write, and execute files.
2. Setting Directory Permissions
When you create your upload directory, you should restrict its permissions. Here’s how you can set up the permissions in a Unix/Linux environment:

Create the Upload Directory

bash
Copy code
mkdir uploads
Set Directory Permissions

Set the directory permissions to 755, allowing the owner to read, write, and execute, while others can only read and execute:

bash
Copy code
chmod 755 uploads
This allows the web server to access files but prevents other users from modifying them.

3. Set User and Group Ownership
Ensure that the upload directory and its files are owned by the web server user (e.g., www-data for Apache or Nginx on Ubuntu). You can change the ownership with:

bash
Copy code
chown www-data:www-data uploads
4. Set File Permissions for Uploaded Files
When files are uploaded, you should set their permissions to 644. This allows the owner to read and write the file, while others can only read it:

After File Upload

Modify your file upload code to set permissions after saving the file. Here’s an example in Python:

python
Copy code
import os
import stat

def handle_file_upload():
    if 'file' not in request.files:
        return "No file uploaded"
    
    file = request.files['file']
    
    if file.filename == '' or not allowed_file(file.filename):
        return "Invalid file type"

    file_path = os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(file.filename))
    file.save(file_path)

    # Set file permissions to 644
    os.chmod(file_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)

    return "File uploaded successfully"
5. Regularly Audit Permissions
Regularly check and audit the permissions of your upload directory and files to ensure they have not changed unintentionally.

6. Remove Write Permissions for Other Users
If the web server does not need to write to the upload directory after the initial upload, you can remove write permissions for the group and others:

bash
Copy code
chmod 755 uploads
This prevents any further modifications by unauthorized users.

7. Configure umask for the Web Server
Set the umask for the web server user to ensure that newly created files have restricted permissions. You can add the following line to the appropriate configuration file (e.g., /etc/profile or /etc/bash.bashrc):

bash
Copy code
umask 002
This ensures that files are created with 664 and directories with 775.

8. Using ACLs (Access Control Lists)
If you need more granular control over permissions, consider using ACLs. This allows you to set permissions for specific users or groups:

bash
Copy code
# Install ACL if not installed
sudo apt-get install acl

# Set default ACL for the uploads directory
setfacl -m u:www-data:rwx uploads
setfacl -d -m u:www-data:rwx uploads
Conclusion
By carefully setting and managing file and directory permissions, you can greatly enhance the security of your file uploads. Always follow best practices, and regularly review your settings to ensure they remain secure.

SET APPROPRIATE PERMISSIONS
File and Directory Permissions: Ensure that the uploaded files and directories have restricted permissions (e.g., not writable by the web server).

Setting up appropriate and restricted file and directory permissions for uploaded files is crucial for enhancing the security of your application. Here’s a guide on how to configure these permissions effectively:

1. Use the Principle of Least Privilege
Grant only the minimum permissions necessary for users and processes to function. Avoid using overly permissive settings like 777, which allows everyone to read, write, and execute files.
2. Setting Directory Permissions
When you create your upload directory, you should restrict its permissions. Here’s how you can set up the permissions in a Unix/Linux environment:

Create the Upload Directory

bash
Copy code
mkdir uploads
Set Directory Permissions

Set the directory permissions to 755, allowing the owner to read, write, and execute, while others can only read and execute:

bash
Copy code
chmod 755 uploads
This allows the web server to access files but prevents other users from modifying them.

3. Set User and Group Ownership
Ensure that the upload directory and its files are owned by the web server user (e.g., www-data for Apache or Nginx on Ubuntu). You can change the ownership with:

bash
Copy code
chown www-data:www-data uploads
4. Set File Permissions for Uploaded Files
When files are uploaded, you should set their permissions to 644. This allows the owner to read and write the file, while others can only read it:

After File Upload

Modify your file upload code to set permissions after saving the file. Here’s an example in Python:

python
Copy code
import os
import stat

def handle_file_upload():
    if 'file' not in request.files:
        return "No file uploaded"
    
    file = request.files['file']
    
    if file.filename == '' or not allowed_file(file.filename):
        return "Invalid file type"

    file_path = os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(file.filename))
    file.save(file_path)

    # Set file permissions to 644
    os.chmod(file_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)

    return "File uploaded successfully"
5. Regularly Audit Permissions
Regularly check and audit the permissions of your upload directory and files to ensure they have not changed unintentionally.

6. Remove Write Permissions for Other Users
If the web server does not need to write to the upload directory after the initial upload, you can remove write permissions for the group and others:

bash
Copy code
chmod 755 uploads
This prevents any further modifications by unauthorized users.

7. Configure umask for the Web Server
Set the umask for the web server user to ensure that newly created files have restricted permissions. You can add the following line to the appropriate configuration file (e.g., /etc/profile or /etc/bash.bashrc):

bash
Copy code
umask 002
This ensures that files are created with 664 and directories with 775.

8. Using ACLs (Access Control Lists)
If you need more granular control over permissions, consider using ACLs. This allows you to set permissions for specific users or groups:

bash
Copy code
# Install ACL if not installed
sudo apt-get install acl

# Set default ACL for the uploads directory
setfacl -m u:www-data:rwx uploads
setfacl -d -m u:www-data:rwx uploads
Conclusion
By carefully setting and managing file and directory permissions, you can greatly enhance the security of your file uploads. Always follow best practices, and regularly review your settings to ensure they remain secure.

IMPLEMENTING ACCESS CONTROLS

Implementing access controls for file uploads involves ensuring that only authenticated users can upload files and that they have the appropriate permissions to do so. Here’s a step-by-step guide on how to achieve this in a Flask application:

Step 1: User Authentication
To implement user authentication, you can use Flask extensions such as Flask-Login. This allows you to manage user sessions and handle authentication.

Install Flask-Login

First, install Flask-Login: 

pip install Flask-Login


1. Set Up User Authentication

Create a User Model: Define a user model for your application. This example assumes a simple user model using a dictionary for demonstration purposes. In a production app, you'd likely use a database.

In [None]:
from flask import Flask, redirect, url_for, request, session
from flask_login import LoginManager, UserMixin, login_user, logout_user, login_required

app = Flask(__name__)
app.secret_key = 'your_secret_key'  # Replace with your secret key
login_manager = LoginManager()
login_manager.init_app(app)

# Sample user store (replace with a database in a real application)
users = {'user': {'password': 'password123'}}

2. User Loader: Implement the user loader function for Flask-Login.

In [None]:
@login_manager.user_loader
def load_user(user_id):
    return User(user_id)

3. User Class: Define a simple user class that inherits from UserMixin.

In [None]:
class User(UserMixin):
    def __init__(self, username):
        self.username = username

In [None]:
4. Login Route: Create a route for user login.

In [None]:
@app.route('/login', methods=['GET', 'POST'])
def login():
    if request.method == 'POST':
        username = request.form['username']
        password = request.form['password']
        user = users.get(username)

        if user and user['password'] == password:
            user_obj = User(username)
            login_user(user_obj)
            return redirect(url_for('upload_file'))
        else:
            return "Invalid username or password"

    return '''
        <form method="post">
            <input type="text" name="username" placeholder="Username">
            <input type="password" name="password" placeholder="Password">
            <input type="submit" value="Login">
        </form>
    '''

In [None]:
5. Logout Route: Implement a logout route.

In [None]:
@app.route('/logout')
@login_required
def logout():
    logout_user()
    return redirect(url_for('login'))

Step 2: File Upload Authorization
Now that you have user authentication set up, you can implement authorization to check if users are allowed to upload files.

1. Define User Roles: If you have different user roles (e.g., admin, regular users), define what permissions each role has.

In [None]:
user_roles = {
    'user': 'regular',
    'admin': 'admin'
}

2. Authorization Check: Modify your file upload route to check if the authenticated user has permission to upload files.

In [None]:
@app.route('/upload', methods=['POST'])
@login_required  # Ensure the user is logged in
def handle_file_upload():
    # Check user permissions
    if user_roles[session['username']] != 'admin':
        return "You do not have permission to upload files."

    if 'file' not in request.files:
        return "No file uploaded"
    
    file = request.files['file']
    
    if file.filename == '' or not allowed_file(file.filename):
        return "Invalid file type"

    file_path = os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(file.filename))
    file.save(file_path)

    return "File uploaded successfully"

Step 3: Protect the Upload Route
Use the @login_required decorator from Flask-Login to protect your upload route. This ensures that only authenticated users can access it.

Step 4: Test Your Implementation
Start your Flask application.
Navigate to the login page and enter credentials.
Try uploading a file as a regular user and as an admin to see how permissions are enforced.
Conclusion
By following these steps, you can effectively implement user authentication and authorization for file uploads in your Flask application. This ensures that only authenticated users can upload files and that they have the appropriate permissions to do so. For production, consider using a database for user management and implement more robust security measures.

HOW TO MONITOR AND LOG UPLOADS 
Monitoring and logging uploads is essential for maintaining security and accountability in your application. Here's how to implement logging and set up alerts for suspicious activities in a Flask application.

#Step 1: Set Up Logging
Import the Logging Module: Start by importing Python's built-in logging module.

In [None]:
import logging

In [None]:
# Configure Logging: Set up the logging configuration. You can log to a file or stdout. Here’s how to log to a file:

logging.basicConfig(
    filename='upload_logs.log',  # Log file name
    level=logging.INFO,           # Log level
    format='%(asctime)s - %(levelname)s - %(message)s'
)

In [None]:
# Log Upload Events: In your upload handling route, log relevant information such as the username, filename, and upload timestamp.
@app.route('/upload', methods=['POST'])
@login_required
def handle_file_upload():
    if 'file' not in request.files:
        return "No file uploaded"
    
    file = request.files['file']
    
    if file.filename == '' or not allowed_file(file.filename):
        return "Invalid file type"

    file_path = os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(file.filename))
    file.save(file_path)

    # Log the upload
    logging.info(f"User '{session['username']}' uploaded '{file.filename}'")

    return "File uploaded successfully"

#Step 2: Monitor User Activity
Track Upload Count: Maintain a simple counter for the number of uploads per user in memory or in a database. This will help identify suspicious activity.

In [None]:
user_upload_count = {}

@app.route('/upload', methods=['POST'])
@login_required
def handle_file_upload():
    ...
    # Increment upload count
    username = session['username']
    user_upload_count[username] = user_upload_count.get(username, 0) + 1
    
    # Check for suspicious activity (e.g., more than 5 uploads in a minute)
    if user_upload_count[username] > 5:
        logging.warning(f"Suspicious activity detected: User '{username}' has uploaded {user_upload_count[username]} files.")
        # Here you can send an alert or take action

    ...

#Step 3: Set Up Alerts for Suspicious Activities
Email Alerts: You can set up email alerts for suspicious activities using the smtplib library. Here’s an example:

In [None]:
import smtplib
from email.mime.text import MIMEText

def send_alert_email(username):
    msg = MIMEText(f"Suspicious activity detected for user: {username}")
    msg['Subject'] = 'Alert: Suspicious File Upload Activity'
    msg['From'] = 'your_email@example.com'
    msg['To'] = 'admin@example.com'

    with smtplib.SMTP('smtp.example.com', 587) as server:
        server.starttls()
        server.login('your_email@example.com', 'your_password')
        server.send_message(msg)

# Call this function when suspicious activity is detected
if user_upload_count[username] > 5:
    send_alert_email(username)

Log Suspicious Activities: Make sure to log any suspicious activity as well, so you have a record of it:

In [None]:
logging.warning(f"Suspicious activity detected: User '{username}' has uploaded {user_upload_count[username]} files.")

#Step 4: Regular Review and Maintenance
1. Log Rotation: Implement log rotation to prevent your log file from becoming too large. You can use the logging.handlers module:

In [None]:
from logging.handlers import RotatingFileHandler

handler = RotatingFileHandler('upload_logs.log', maxBytes=10000, backupCount=3)
logger = logging.getLogger()
logger.addHandler(handler)

2. Regular Audits: Periodically review the logs for any unusual patterns or activities.
   
Conclusion
By implementing logging and monitoring for uploads, you can enhance the security of your application and quickly respond to suspicious activities. This setup not only helps in tracking user actions but also aids in maintaining compliance with security policies and regulations.