<a href="https://colab.research.google.com/github/VALQUIRIAFABRO/Study_Python/blob/master/pgp_encrypted_file.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## PGP-encrypted file

Python function that decrypts a PGP-encrypted file named data.pgp and saves the decrypted content to a file named data.csv using the gnupg library.

Make sure to replace 'your_passphrase' with the actual passphrase used for decryption. Ensure that the gnupg library is installed in your environment (pip install python-gnupg).

In [None]:
!pip install python-gnupg pandas hdfs

In [None]:
# Since Google Colab doesn’t provide a traditional terminal interface to handle interactive commands like gpg --gen-key, we’ll need to work around this limitation.
# Generate GPG Key Locally: On your local machine, open a terminal and run

# Generate key
# gpg --gen-key

# list keys
# gpg --list-keys

# Export the public key:
# gpg --export -a "your_email@example.com" > public_key.asc

# Export the private key:
# gpg --export-secret-key -a "your_email@example.com" > private_key.asc

In [None]:
#Upload keys to Google Colab
from google.colab import files

uploaded = files.upload()

In [None]:
# Upload Your Keys to Google Colab: public_key.asc and private_key.asc files to your Colab environment
# The ideal encoding for GPG keys is UTF-8

!gpg --import public_key.asc
!gpg --import private_key.asc


Function that generates sample data and saves it as an csv and encrypted PGP file locally.

In [None]:
import os

os.makedirs('/content/.gnupg', exist_ok=True)

gpg = gnupg.GPG()

gpg = gnupg.GPG(verbose=True)

!chmod 700 /content/.gnupg


In [None]:
import os
import pandas as pd
import random
import gnupg
from datetime import datetime, timedelta

def generate_pgp_encrypted_file():
    # Sample data generation
    base_time = datetime.now()
    data = {
        'name': [f'Name{i}' for i in range(1, 11)],
        'age': [random.randint(20, 60) for _ in range(10)],
        'profession': [f'Profession{i}' for i in range(1, 11)],
        'years_of_experience': [random.randint(1, 30) for _ in range(10)],
        'activity': [f'Activity{i}' for i in range(1, 11)],
        'datetime': [base_time + timedelta(seconds=i) for i in range(10)]
    }
    df = pd.DataFrame(data)

    # Create a unique filename with datetime
    current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
    csv_filename = f'data_{current_time}.csv'
    df.to_csv(csv_filename, index=False)

    # Ensure the gnupghome directory exists and set permissions
    os.makedirs('/content/.gnupg', exist_ok=True)
    os.system('chmod 700 /content/.gnupg')  # Set permissions

    # Initialize GPG
    gpg = gnupg.GPG(gnupghome='/content/.gnupg')

    # Import the keys with passphrase
    passphrase = 'passphrase'          #'your_passphrase_here'
    with open('public_key.asc', 'r') as f:
        import_result = gpg.import_keys(f.read())

    with open('private_key.asc', 'r') as f:
        gpg.import_keys(f.read(), passphrase=passphrase)

    # Get the fingerprint of the imported key
    fingerprints = [key['fingerprint'] for key in gpg.list_keys()]
    if fingerprints:
        # Trust the key with the fingerprint
        for fingerprint in fingerprints:
            gpg.trust_keys(fingerprint, 'TRUST_ULTIMATE')

        # Encrypt the file
        with open(csv_filename, 'rb') as f:
            status = gpg.encrypt_file(
                f,
                recipients=['your_email@email.com'], # Replace with the actual recipient's email
                output=f'data_{current_time}.gpg'
            )

        if status.ok:
            print('File encrypted successfully.')
        else:
            print('Encryption failed:', status.stderr)
    else:
        print('No fingerprints found. Key import might have failed.')

generate_pgp_encrypted_file()


File encrypted successfully.


Decrypt the file

In [None]:
import os
import gnupg

def import_keys(gpg, public_key_file, private_key_file, passphrase):
    # Import the public key
    with open(public_key_file, 'r') as f:
        gpg.import_keys(f.read())

    # Import the private key with passphrase
    with open(private_key_file, 'r') as f:
        gpg.import_keys(f.read(), passphrase=passphrase)

    # List all keys in the keyring to verify
    print("Public Keys:", gpg.list_keys())
    print("Private Keys:", gpg.list_keys(secret=True))

def decrypt_pgp_file(gpg, input_file, output_file, passphrase):
    # Open the encrypted file
    with open(input_file, 'rb') as f:
        # Decrypt the file
        status = gpg.decrypt_file(f, passphrase=passphrase, output=output_file)

    # Check the status of the decryption
    if status.ok:
        print(f"File decrypted successfully. Saved as: {output_file}")
    else:
        print(f"Failed to decrypt the file. Status: {status.status}")
        print(f"Error message: {status.stderr}")

# Initialize the GnuPG object
gpg = gnupg.GPG(gnupghome='/content/.gnupg')

# Set the passphrase for the private key
passphrase = 'passphrase' # Replace with your actual passphrase

# Import the keys
import_keys(gpg, 'public_key.asc', 'private_key.asc', passphrase)

# Example usage to decrypt the file
input_file = 'data_20250121_230851.gpg' # Replace with your gpg file to be decrypt
output_file = 'finaldata.csv'

decrypt_pgp_file(gpg, input_file, output_file, passphrase)

In [None]:
!pip install hdfs

Load csv file into Hadoop HDFS

In [None]:
# prompt: Load csv file into Hadoop HDFS

from hdfs import InsecureClient

# Replace with your Hadoop Namenode details
hdfs_namenode = 'http://localhost:9870'  # Example: 'http://your-namenode-host:port'
hdfs_user = 'your_hdfs_user' # Example: 'hadoop'

# Create an HDFS client
client = InsecureClient(hdfs_namenode, user=hdfs_user)

# Replace with the actual path to your CSV file in Colab
local_csv_file_path = 'finaldata.csv'

# Replace with the desired HDFS path where you want to store the CSV file
hdfs_csv_file_path = '/user/your_hdfs_user/finaldata.csv' # Example: '/user/your_username/data.csv'


# Upload the CSV file to HDFS
try:
    client.upload(hdfs_csv_file_path, local_csv_file_path)
    print(f"File '{local_csv_file_path}' uploaded to HDFS at '{hdfs_csv_file_path}' successfully.")

except Exception as e:
    print(f"Error uploading file to HDFS: {e}")