# Country Codes & Continents: A Dataset with ISO 3166-1 Alpha-2

This notebook aims to create a dataset of countries, their corresponding ISO 3166-1 Alpha-2 codes, and their respective continents.

**Key Features:**

* Utilizes the ISO 3166-1 Alpha-2 standard for country codes.
* Includes a comprehensive list of countries and their associated continents.
* Provides a clean and organized dataset for various data analysis and mapping projects.

**Potential Use Cases:**

* Geocoding and mapping applications.
* Data analysis and visualization projects.
* Internationalization and localization tasks.
* Building applications that require country-specific information.

This notebook demonstrates a simple and efficient approach to gathering and organizing country-related data.

**Note:** 

* Data sources may vary, and the accuracy of the information should be verified independently.

In [1]:
# Import necessary libraries
import requests
import json
import datetime
import os
import shutil
from pytz import timezone
import git
from git import Repo
from kaggle_secrets import UserSecretsClient

# Set the Indian Standard Time (IST) timezone
ist = timezone('Asia/Kolkata')

In [2]:
def fetch_country_data():
    """
    Fetches country data from the REST Countries API and returns a dictionary
    with ISO 3166-1 Alpha-2 codes as keys and details (country name, continent,
    continent code, IT hub status) as values.

    Returns:
        dict: A dictionary containing country data with ISO Alpha-2 codes as keys.
    """
    # Mapping of continent full names to continent codes
    continent_codes = {
        "Africa": "AF",
        "Asia": "AS",
        "Europe": "EU",
        "North America": "NA",
        "South America": "SA",
        "Oceania": "OC",
        "Antarctica": "AN"
    }
    
    # List of ISO Alpha-2 codes for IT hub countries
    it_hub_countries = {
        'US', 'IN', 'CN', 'JP', 'KR', 'DE', 'GB', 'FR', 'CA', 'AU',
        'SG', 'SE', 'FI', 'IE', 'IL', 'NL', 'CH', 'ES', 'IT', 'BR',
        'ZA', 'RU', 'AE', 'TR', 'PL', 'VN', 'MY', 'PH', 'TH', 'ID',
        'HK', 'TW'
    }
    
    # REST Countries API endpoint
    url = "https://restcountries.com/v3.1/all"
    response = requests.get(url)
    countries = response.json()
    
    country_mapping = {}
    for country in countries:
        country_code = country.get("cca2", None)
        if not country_code:
            continue
        
        country_name = country.get("name", {}).get("common", "Unknown")
        continents = country.get("continents", [])
        if continents:
            continent = continents[0]
            continent_code = continent_codes.get(continent, "Unknown")
        else:
            continent = "Unknown"
            continent_code = "Unknown"
        
        it_hub_status = "Yes" if country_code in it_hub_countries else "No"
        
        country_mapping[country_code] = {
            "country_name": country_name,
            "continent": continent,
            "continent_code": continent_code,
            "it_hub_country": it_hub_status
        }
    
    return country_mapping

In [3]:
def save_country_data(country_data):
    """
    Saves the country data to a JSON file with a timestamped filename in the current working directory.

    Args:
        country_data (dict): The country data to be saved.
    """
    # Generate a timestamp in the format YYYY-MM-DD_HH:MM:SS
    timestamp = datetime.datetime.now(ist).strftime("%Y-%m-%d_%H_%M_%S")
    # Create the filename with the timestamp
    filename = f"RE_{timestamp}_country_details.json"
    # Define the file path in the current working directory
    filepath = os.path.join(os.getcwd(), filename)
    # Write the country data to the JSON file
    with open(filepath, "w", encoding="utf-8") as file:
        json.dump(country_data, file, indent=4, ensure_ascii=False)
    print(f"Country data saved as {filepath}")

In [4]:
def push_to_github(repo_url):
    """
    Automates pushing the generated JSON file to a GitHub repository.
    Finds the latest JSON file, clones or pulls the repository, copies the file,
    commits, and pushes it.

    Args:
        repo_url (str): The URL of the GitHub repository.
    """
    # List all files in the current working directory
    output_files = os.listdir(os.getcwd())
    try:
        # Filter for JSON files that match the naming pattern
        json_files = [file for file in output_files if file.startswith("RE") and file.endswith("country_details.json")]
        if json_files:
            # Get the most recently created JSON file
            latest_file = max(json_files, key=os.path.getctime)
        else:
            raise ValueError("No JSON files found!")
    except ValueError as e:
        print(f"Error fetching the recent .json file: {e}")
        return False
    
    # Define the local path for the cloned repository
    local_repo_path = os.path.join(os.getcwd(), "YouTubeFoodChannelAnalysis")
    # Define the destination path within the repository
    destination_path = os.path.join(local_repo_path, "Requirement", "Daily")
    
    print(f"Latest JSON file: {latest_file}")
    try:
        if os.path.exists(local_repo_path):
            print("Repository already cloned; pulling latest changes.")
            repo = git.Repo(local_repo_path)
            origin = repo.remote(name='origin')
            origin.pull()
            print("Successfully pulled the latest changes.")
        else:
            repo = git.Repo.clone_from(repo_url, local_repo_path)
            print("Successfully cloned the repository.")
        
        if not os.path.exists(destination_path):
            os.makedirs(destination_path)
        # Copy the latest JSON file to the destination path in the repository
        shutil.copyfile(os.path.join(os.getcwd(), latest_file),
                        os.path.join(destination_path, latest_file))
        
        repo = Repo(local_repo_path)
        # Stage the new file for commit
        repo.index.add([os.path.join(destination_path, latest_file)])
        # Create a commit message with the current timestamp
        timestamp = datetime.datetime.now(ist).strftime("%Y-%m-%d_%H:%M:%S")
        repo.index.commit(f"{timestamp} - Added {latest_file} from Kaggle notebook")
        origin = repo.remote(name="origin")
        # Push the commit to the remote repository
        push_result = origin.push()
        if push_result:
            print("Output files successfully pushed to GitHub!")
        else:
            print("Pushing to GitHub failed.")
        return True
    except Exception as e:
        print(f"An error occurred during git automation: {e}")
        return False

In [5]:
def main():
    """
    Main function to fetch country data, save it to a JSON file, and push it to GitHub.
    """
    # Fetch the country data
    country_data = fetch_country_data()
    # Save the country data to a JSON file
    save_country_data(country_data)
    # Push the JSON file to the GitHub repository
    push_to_github(repo_url)

In [6]:
if __name__ == "__main__":
    # Initialize the UserSecretsClient to retrieve the GitHub repository URL
    user_secrets = UserSecretsClient()
    secret_value_0 = user_secrets.get_secret("requirementRepoUrl")
    repo_url = secret_value_0
    # Execute the main function
    main()

Country data saved as /kaggle/working/RE_2025-02-07_06_41_12_country_details.json
Latest JSON file: RE_2025-02-07_06_41_12_country_details.json
Successfully cloned the repository.
Output files successfully pushed to GitHub!
