### 1. Import lib

In [9]:
import yaml
import pandas as pd

from sqlalchemy import create_engine

### 2. Help functions

In [30]:

# Function load data
def read_data (url:str) -> pd.DataFrame:
    return pd.read_csv(url, sep='|')

# Function transform data
def clean_data(df: pd.DataFrame) -> pd.DataFrame:
    """
    Clean the DataFrame by removing unnecessary columns and renaming others.
    """
    # Rename columns
    df = df.rename(columns={'ReportDt':'report_dt', 'Unit':'unit', 'Power':'power'})

    # Alter types of columns
    df['power'] = df['power'].astype(int)

    # Remoce sapce and lower case
    df['unit'] = df['unit'].apply(lambda x: x.strip().lower().replace(' ','_') )

    # Format date of column report_dt
    df['report_dt'] = pd.to_datetime( df['report_dt'].apply(lambda x: x.split(' ')[0]), format='%m/%d/%Y' )

    return df

### 3. DB Conection

In [10]:
# Load the YAML configuration file
with open('/home/las/Documentos/repos/projeto_airflow/config.yaml', "r") as file:
    config = yaml.safe_load(file)

# Extract database connection details from the config
host = config["database"]["host"]
db = config["database"]["db"]
user = config["database"]["user"]
password = config["database"]["password"]

# conection string
conn = f'postgresql://{user}:{password}@{host}.oregon-postgres.render.com/{db}'

# create engine
engine = create_engine(conn)

# create connection
con = engine.connect()


### 4. Loading data

In [32]:
url = 'https://www.nrc.gov/reading-rm/doc-collections/event-status/reactor-status/powerreactorstatusforlast365days.txt'
df = read_data(url)
df.head()

Unnamed: 0,ReportDt,Unit,Power
0,5/22/2025 12:00:00 AM,Arkansas Nuclear 1,100
1,5/22/2025 12:00:00 AM,Arkansas Nuclear 2,100
2,5/22/2025 12:00:00 AM,Beaver Valley 1,100
3,5/22/2025 12:00:00 AM,Beaver Valley 2,99
4,5/22/2025 12:00:00 AM,Braidwood 1,100


### 5. Transform data

In [33]:
df = clean_data(df)
df.head()

Unnamed: 0,report_dt,unit,power
0,2025-05-22,arkansas_nuclear_1,100
1,2025-05-22,arkansas_nuclear_2,100
2,2025-05-22,beaver_valley_1,100
3,2025-05-22,beaver_valley_2,99
4,2025-05-22,braidwood_1,100


In [35]:
con.close()
engine.dispose()