In [1]:
import os
import pyodbc, struct
from azure import identity

from typing import Union
from fastapi import FastAPI
from pydantic import BaseModel

import sqlite3
import pandas as pd

from loguru import logger
from tqdm import tqdm

In [2]:
def get_dataset_from_sqlite(path: str) -> pd.DataFrame:

    with sqlite3.connect(path) as conn:
        df = pd.read_sql_query(
            "SELECT * FROM ondapocos;",
            conn,
            parse_dates=["date"]
        )


    df = df.sort_values("date", ascending=False)

    df['content'] = df['content'].str.split('Receba as notícias através').str[0]

    string_columns = [
        "id",
        "title",
        "author",
        "snippet",
        "link",
        "content",
        "thumbnail_link",
        "thumbnail_alt",
        "categories",
    ]

    for col in string_columns:
        df[col] = df[col].astype("string")
    
    return df

In [3]:
def populate_azure_database(df: pd.DataFrame, timeout: int = 60) -> None:
    table_name = f'{os.environ["RAW_DATA_SCHEMA"]}.{os.environ["RAW_DATA_TABLE"]}'
    connection_string = 'DRIVER={driver};SERVER={server};DATABASE={database};UID={username};PWD={password}'.format(
        server = os.environ["AZURE_SQL_SERVER"],
        database = os.environ["AZURE_SQL_DATABASE"],
        driver = os.environ["AZURE_SQL_DRIVER"],
        username = os.environ["AZURE_SQL_USERNAME"],
        password = os.environ["AZURE_SQL_PASSWORD"],
    )
    conn = pyodbc.connect(connection_string, timeout=timeout)
    cursor = conn.cursor()
    for index, row in tqdm(df.iterrows(), total=len(df)):
        try:
            cursor.execute(f"""
                INSERT INTO {table_name} 
                (id, title, author, date, snippet, link, content, thumbnail_link, thumbnail_alt, categories)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            """, (
                row['id'],
                row['title'],
                row['author'],
                row['date'],
                row['snippet'],
                row['link'],
                row['content'],
                row['thumbnail_link'],
                row['thumbnail_alt'],
                row['categories']
            ))
    
        except pyodbc.IntegrityError as err:
            logger.error(f"ID {row['id']} is already in database {table_name}.")
    conn.commit()
    conn.close()

In [14]:
df = get_dataset_from_sqlite("pocos-news.db")

populate_azure_database(df)