In [1]:
# Write detail explanation with code as data engineer for fetching data from any dataset training website using pandas and api and 
# store in oracle db data should be less than 50 mb. Show both ways sqlalchemy and cx_oracle and do some transformation and reporting as well.
# Fetch data from COVID-19 API as an example
import pandas as pd
import requests
import cx_Oracle
from sqlalchemy import create_engine
import datetime
url = "https://api.covid19api.com/summary"
response = requests.get(url)
data = response.json()['Countries']
df = pd.DataFrame(data)

# Data transformation
df['Date'] = pd.to_datetime(df['Date'])
df['ActiveCases'] = df['Confirmed'] - df['Deaths'] - df['Recovered']
df = df[['Country', 'Date', 'Confirmed', 'Deaths', 'Recovered', 'ActiveCases']]
df = df.loc[df['Confirmed'] > 0]  # Filter relevant data

# Method 1: Using cx_Oracle
dsn = cx_Oracle.makedsn(host='localhost', port=1521, service_name='orcl')
connection = cx_Oracle.connect(user='username', password='password', dsn=dsn)
cursor = connection.cursor()

# Create table
create_table_sql = """
CREATE TABLE covid_data (
    country VARCHAR2(100),
    report_date DATE,
    confirmed NUMBER,
    deaths NUMBER,
    recovered NUMBER,
    active_cases NUMBER
)
"""
cursor.execute(create_table_sql)

# Insert data
for _, row in df.iterrows():
    cursor.execute("""
        INSERT INTO covid_data VALUES (:1, :2, :3, :4, :5, :6)
    """, (row['Country'], row['Date'], row['Confirmed'], row['Deaths'], 
          row['Recovered'], row['ActiveCases']))

connection.commit()

# Method 2: Using SQLAlchemy
engine = create_engine('oracle+cx_oracle://username:password@localhost:1521/?service_name=orcl')
df.to_sql('covid_data_sql', engine, if_exists='replace', index=False)

# Generate simple report
print("Data Summary Report")
print("-----------------")
print(f"Total Records: {len(df)}")
print("\nTop 5 Countries by Active Cases:")
print(df.nlargest(5, 'ActiveCases')[['Country', 'ActiveCases']])

ModuleNotFoundError: No module named 'cx_Oracle'