In [1]:
import os
import shutil
import pandas as pd
import psycopg2

# Find Excel files in the current directory
excel_files = [file for file in os.listdir(os.getcwd()) if file.endswith('.xlsx')]

# Create a new directory
dataset_dir = 'datasets'
try:
    os.mkdir(dataset_dir)
except FileExistsError:
    pass

# Move the Excel files to the new directory
for excel in excel_files:
    src_path = os.path.abspath(excel)
    dest_path = os.path.join(dataset_dir, excel)
    try:
        shutil.move(src_path, dest_path)
        print(f"Moved {excel} to {dataset_dir}")
    except FileNotFoundError:
        print(f"File not found: {excel}")
    except shutil.Error as e:
        print(f"Error moving file {excel}: {e}")

# Read Excel files into Pandas DataFrames
df = {}
for file in excel_files:
    file_path = os.path.join(dataset_dir, file)
    try:
        df[file] = pd.read_excel(file_path)
        print(f"Reading {file}")
    except pd.errors.ParserError:
        print(f"Error parsing file {file}")

# Iterate over DataFrames, clean column names, and upload to PostgreSQL
replacements = {
    'object': 'varchar',
    'float64': 'float',
    'int64': 'bigint',
    'timedelta64[ns]': 'varchar',
    'datetime64[ns]': 'timestamp',
    'bool': 'boolean',
    'datetime64': 'timestamp'
}

hostname = 'localhost'
database = 'covid19'
username = 'postgres'
password = 'password'
port_id = 5432

for file, dataframe in df.items():
    clean_table_name = file.lower().replace(" ", "_").replace("?", "") \
        .replace("-", "_").replace("/", "_").replace("\\", "_").replace("%", "") \
        .replace(")", "").replace("(", "").replace("$", "")

    table_name = clean_table_name.split('.')[0]

    dataframe.columns = [col.lower().replace(" ", "_").replace("?", "").replace("¢", "") \
                         .replace("-", "_").replace("/", "_").replace("\\", "_").replace("%", "") \
                         .replace(")", "").replace("(", "").replace("$", "").replace(".", "")
                         for col in dataframe.columns]

    col_str = ", ".join("{} {}".format(n, replacements.get(str(d), 'varchar')) for (n, d) in
                        zip(dataframe.columns, dataframe.dtypes))

    conn = psycopg2.connect(
        host=hostname,
        dbname=database,
        user=username,
        password=password,
        port=port_id
    )

    cursor = conn.cursor()
    print(f'Opened the database successfully for {table_name}')

    cursor.execute(f"DROP TABLE IF EXISTS {table_name};")
    cursor.execute(f"CREATE TABLE {table_name} ({col_str});")

    print(f'{table_name} was created successfully')

    dataframe.to_csv(file, header=dataframe.columns, index=False, encoding='utf-8')

    with open(file) as my_file:
        print(f'File {file} opened in memory')
        cursor.copy_expert(sql=f"COPY {table_name} FROM STDIN WITH CSV HEADER DELIMITER AS ',';", file=my_file)
        print(f'File {file} copied to database')

    cursor.execute(f"GRANT SELECT ON TABLE {table_name} TO public;")
    conn.commit()
    cursor.close()
    conn.close()

print('All tables have been successfully imported into the database.')



Moved Original dataset.xlsx to datasets
Reading Original dataset.xlsx
Opened the database successfully for original_dataset
original_dataset was created successfully
File Original dataset.xlsx opened in memory
File Original dataset.xlsx copied to database
All tables have been successfully imported into the database.
