Extract data from a REST API (https://restcountries.com/v3.1/name/{name}) for 5 countries: India, US, UK, China, Russia

Load it by saving each country’s data into a separate .json file named after the country (e.g., india.json, us.json)

STEP 1. IMPORTING NECESSARY LIBRARIES

In [108]:
import requests #used to send http request to api  in order to fetch the data 
import json     #used to handle the data coming from api (lets u save the api data in .json format)        
import os       #used to handle the file operation in the system
from datetime import datetime
import schedule
import time
import pytz

STEP 2.DEFINING COUNTRY LIST 

In [111]:
country_list=['india','us','uk','china','russia']

STEP 3.DEFINING API URL

In [114]:
api_url = "https://restcountries.com/v3.1/name/" #country name to be added as per our need manually using loop

STEP 5. CREATING FOLDER FOR JSON FILES

In [117]:
output_folder = 'country_data'
os.makedirs(output_folder , exist_ok = True)  #creates new folder to store the data , if folder already exist it will not throw any error

STEP 6.ETL LOGIC

In [122]:
def run_etl():
    print(f"\n✅ ETL Started at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    
    for country in country_list:
        print(f"\nProcessing: {country.upper()}")

        # EXTRACT
        individual_url = api_url + country
        response = requests.get(individual_url)

        if response.status_code == 200:
            data = response.json()

            # TRANSFORM (No transformation required)

            # LOAD
            file_path = os.path.join(output_folder, f"{country}.json")  # Join file with folder
            with open(file_path, 'w') as f:  # 'w' = write mode; creates or overwrites file
                json.dump(data, f, indent=4)  # Write JSON data with indentation
            print(f"Data saved to: {file_path}")
        
        else:
            print(f"Failed to fetch data for {country}. HTTP Status: {response.status_code}")

    print(f"\n✅ ETL Finished at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")


STEP 7.AUTOMATION OF THIS PIPELINE TO RUN TWO TIMES A DAY 12 AM AND 12 PM

In [None]:
ist = pytz.timezone("Asia/Kolkata")

# Schedule tasks
schedule.every().day.at("00:00").do(run_etl)
schedule.every().day.at("12:00").do(run_etl)

print("Scheduler started. Waiting for 12:00 AM and 12:00 PM IST")

# Loop forever to keep checking time
while True:
    schedule.run_pending()
    time.sleep(60)  # Check every minute

Scheduler started. Waiting for 12:00 AM and 12:00 PM IST
