In [1]:
!pip install pymysql pandas openpyxl sqlalchemy



In [3]:
import pandas as pd
from sqlalchemy import create_engine

def ingest_data(file_path: str, db_url: str, db_port, db_name: str, db_user: str, db_password: str):
    # Load the Excel file
    df_dict = pd.read_excel(file_path, sheet_name=None)

    # Dictionary to store DataFrames with their names
    dfs_with_names = {
        'orders': {'df': df_dict['orders'], 'df_name': 'orders'},
        'transport': {'df': df_dict['transport'], 'df_name': 'transport'},
        'payments': {'df': df_dict['payments'], 'df_name': 'payments'},
        'client_contacts': {'df': df_dict['client_contacts'], 'df_name': 'client_contacts'},
        'client_details': {'df': df_dict['client_details'], 'df_name': 'client_details'},
        'client_discounts': {'df': df_dict['client_discounts'], 'df_name': 'client_discounts'}
    }

    # Loop through each DataFrame and apply appropriate transformations
    for key, value in dfs_with_names.items():
        df_name = key
        print(f'df_name: {df_name}')

        if df_name == 'orders':
            value['df'] = value['df'].astype({
                'order_id': 'str', 
                'customer_id': 'str',
                'product': 'str',
                'quantity': 'str'
            })
            value['df']['order_date'] = value['df']['order_date'].dt.strftime('%Y-%m-%d')
        elif df_name == 'payments':
            value['df'] = value['df'].astype({
                'order_id': 'int', 
                'transaction_id': 'str',
                'unit_price': 'float64',
                'total_amount_before_discount': 'float64',
                'payment_status': 'str',
                'payment_type': 'str',
            })
        elif df_name == 'transport':
            value['df'] = value['df'].astype({
                'shipping_id': 'str', 
                'order_id': 'str',
                'delivery_type': 'str',
                'shipping_type': 'str'
            })
            value['df']['order_date'] = value['df']['order_date'].dt.strftime('%Y-%m-%d')
            value['df']['ship_date'] = value['df']['ship_date'].dt.strftime('%Y-%m-%d')
            value['df']['delivery_date'] = value['df']['delivery_date'].dt.strftime('%Y-%m-%d')
        elif df_name == 'client_contacts':
            value['df'] = value['df'].astype({
                'customer_id': 'str', 
                'email': 'str',
                'phone_number': 'str',
                'preference_contact_method': 'str'
            })
        elif df_name == 'client_details':
            value['df'] = value['df'].astype({
                'address_id': 'str', 
                'customer_id': 'str',
                'first_name': 'str',
                'last_name': 'str',
                'city': 'str'
            })
        elif df_name == 'client_discounts':
            value['df'] = value['df'].astype({
                'customer_id': 'str', 
                'client_segments': 'str',
                'loyal_member': 'str',
                'client_segment_discount': 'str',
                'loyal_member_discount': 'str'
            })

        # Create SQLAlchemy engine with username, password, and database
        engine = create_engine(f'mysql+pymysql://{db_user}:{db_password}@{db_url}:{db_port}/{db_name}')

        # Upload the DataFrame to MySQL
        value['df'].to_sql(df_name, con=engine, if_exists='replace', index=False)
        print(f"{df_name} data uploaded successfully!")

# Example usage:
file_path = 'data_academia_v1.xlsx'
# db_url = '172.22.0.1'  # Replace with your database IP and port
db_url = '172.18.0.1'
db_port = 3306
db_name = 'cap_academy'
db_user = 'root'  # Replace with your database username
db_password = 'rootpassword'  # Replace with your database password

ingest_data(file_path, db_url,db_port, db_name, db_user, db_password)

df_name: orders
orders data uploaded successfully!
df_name: transport
transport data uploaded successfully!
df_name: payments
payments data uploaded successfully!
df_name: client_contacts
client_contacts data uploaded successfully!
df_name: client_details
client_details data uploaded successfully!
df_name: client_discounts
client_discounts data uploaded successfully!
