# Chapter 1: Data Collection

In [None]:
!pip install pymysql

In [None]:
from google.colab import userdata
class Config:
    MYSQL_HOST = userdata.get("MYSQL_HOST")
    MYSQL_PORT = userdata.get("MYSQL_PORT")
    MYSQL_USER = userdata.get("MYSQL_USER")
    MYSQL_PASSWORD = userdata.get("MYSQL_PASSWORD")
    MYSQL_DB = 'r2de3'
    MYSQL_CHARSET = 'utf8mb4'

In [None]:
import sqlalchemy
engine = sqlalchemy.create_engine(
    "mysql+pymysql://{user}:{password}@{host}:{port}/{db}".format(
        user=Config.MYSQL_USER,
        password=Config.MYSQL_PASSWORD,
        host=Config.MYSQL_HOST,
        port=Config.MYSQL_PORT,
        db=Config.MYSQL_DB,
    )
)

In [None]:
with engine.connect() as connection:
    result = connection.execute(sqlalchemy.text("show tables;")).fetchall()
result

In [None]:
with engine.connect() as connection:
    desc_transaction = connection.execute(sqlalchemy.text("describe transaction")).fetchall()
    desc_customer = connection.execute(sqlalchemy.text("describe customer")).fetchall()
    desc_product = connection.execute(sqlalchemy.text("describe product")).fetchall()
print("== transaction ==")
print(desc_transaction)
print("== customer ==")
print(desc_customer)
print("== product ==")
print(desc_product)

In [None]:
import pandas as pd

with engine.connect() as connection:
    product_result = connection.execute(sqlalchemy.text("SELECT * FROM r2de3.product;")).fetchall()

product = pd.DataFrame(product_result)
product = product.set_index("ProductNo")

In [None]:
customer = pd.read_sql("SELECT * FROM r2de3.customer", engine)
transaction = pd.read_sql("SELECT * FROM r2de3.transaction", engine)

In [None]:
merged_transaction = transaction.merge(product, how="left", left_on="ProductNo", right_on="ProductNo")\
    .merge(customer, how="left", left_on="CustomerNo", right_on="CustomerNo")
merged_transaction

In [None]:
import requests

url = "https://r2de3-currency-api-vmftiryt6q-as.a.run.app/gbp_thb"
r = requests.get(url)
result_conversion_rate = r.json()
conversion_rate = pd.DataFrame(result_conversion_rate)
conversion_rate = conversion_rate.drop(columns=['id'])
conversion_rate['date'] = pd.to_datetime(conversion_rate['date'])
conversion_rate.head()

In [None]:
final_df = merged_transaction.merge(conversion_rate, how="left", left_on="Date", right_on="date")
final_df["total_amount"] = final_df["Price"] * final_df["Quantity"]
final_df["thb_amount"] = final_df["total_amount"] * final_df["gbp_thb"]

In [None]:
def convert_rate(price, rate):
    return price * rate

final_df["thb_amount"] = final_df.apply(lambda row: convert_rate(row["total_amount"], row["gbp_thb"]), axis=1)

In [None]:
final_df = final_df.drop(["date", "gbp_thb"], axis=1)
final_df.columns = ['transaction_id', 'date', 'product_id', 'price', 'quantity', 'customer_id',
                    'product_name', 'customer_country', 'customer_name',
                    'total_amount', 'thb_amount']
final_df

In [None]:
final_df.to_parquet("output.parquet", index=False)
final_df.to_csv("output.csv", index=False)