In [44]:
import pandas as pd
import os
import logging

from sqlalchemy.exc import OperationalError
from src.methods.extractors.databaseConnectors import DatabaseConnector

In [30]:
def csv_to_dataframe(file_path: str, sep: str) -> pd.DataFrame:
    """Function to read csv files and 
    return as a pandas Dataframe

    Args:
        file_path (str): path of the csv files
        sep (str): csv files separator

    Returns:
        pd.Dataframe: returns a concatenated dataframe
    """  
    dfs = []

    for filename in os.listdir(file_path):
        if filename.endswith('.csv'):
            try:
                dfs.append(pd.read_csv(os.path.join(file_path, filename), sep=sep))
            except Exception as e:
                print(f'Error reading file {filename}: {e}')
    if not dfs:
        print(f'No CSV files were found. Check the file_path: {file_path}')
    
    return pd.concat(dfs, ignore_index=True)

In [31]:
def json_to_dataframe(file_path: str) -> pd.DataFrame:
    """Function to read json files and
    return as a pandas Dataframe.

    Args:
        file_path (str): path of the json files

    Returns:
        pd.Dataframe: returns a concatenated dataframe
    """    
    dfs = []
    
    for filename in os.listdir(file_path):
        if filename.endswith('.json'):
            try:
                dfs.append(pd.read_json(os.path.join(file_path, filename)))
            except Exception as e:
                print(f'Erro reading file {filename}: {e}')
    if not dfs:
        print(f'Any Json file was found. Check the file_path: {file_path}')
        
    return pd.concat(dfs, ignore_index=True)    

In [53]:
#Ainda estou criando esta função

def sql_to_dataframe(connection_string: str, 
                     table: str, 
                     columns: list = None,
                     where: str = None, 
                     limit: int = None
) -> pd.DataFrame:
    
    from sqlalchemy.exc import OperationalError
    
    db_connector = DatabaseConnector(connection_string)
    db_connector.connect()

    if db_connector._db_session:
        try:
            query = db_connector.build_query_string(table, columns, where, limit)
            sanitized_query = db_connector.sanitize_query(query)
            data = db_connector.query_data(sanitized_query)
            if columns is None or columns == []:
                columns = [column[0] for column in data.cursor.description]
            df = pd.DataFrame(data, columns=columns)
            return df
        except OperationalError as e:
            print(f"Error executing query: {e}")
        finally:
            db_connector.disconnect()

In [32]:
def save_dataframe(df: pd.DataFrame, 
                   sep: str, 
                   output_path: str, 
                   output_filename: str
):
    """ Save a pandas dataframe as CSV format

    Args:
        df (pd.DataFrame): dataframe to be saved
        sep (str): CSV separator
        output_path (str): file landing path
        output_filename (str): file output name
    """    
    try:
        output_file = os.path.join(output_path, f'{output_filename}.csv')
        df.to_csv(output_file, sep=sep, index=False)
        print(f'Dataframe created in {output_file}')
    except Exception as e:
        print(f'Error while compiling dataframe: {e}')

In [28]:
'''
df = csv_to_dataframe(r'C:\Users\Vegh\Desktop\teste', ',')
display(df)
save_dataframe(df, ',', r'C:\Users\Vegh\Desktop\teste', 'teste_novo')

df = sql_to_dataframe(
        connection_string="postgresql://cdgdwiuoccdozrbanjrnukrm%40psql-mock-database-cloud:opuxfcqkturbouyhofzjnvqt@psql-mock-database-cloud.postgres.database.azure.com:5432/booking1689961160721moxzizfjzzlvnyyq",
        table='bookings',
        columns=['*'],
        where=None,
        limit=100
    )

display(df)
'''


Unnamed: 0,hour,wave_height
0,00:00 AM,1.2
1,01:00 AM,0.8
2,02:00 AM,0.9
3,03:00 AM,1.1
4,04:00 AM,1.8
...,...,...
1147,19:00 PM,1.9
1148,20:00 PM,2.1
1149,21:00 PM,2.3
1150,22:00 PM,2.4


Dataframe created in C:\Users\Vegh\Desktop\teste\teste_novo.csv
