In [1]:
import requests
from bs4 import BeautifulSoup
import sqlite3


def view_database(db_file):
    conn = sqlite3.connect(db_file)
    cursor = conn.cursor()

    cursor.execute("SELECT * FROM gdp_by_country_data")
    rows = cursor.fetchall()

    print("Database contents:")
    for row in rows:
        print(row)

    conn.close()

In [2]:


def scrape_gdp_by_country():
    url = "https://www.worldometers.info/gdp/gdp-by-country/"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    rows = soup.find_all('tr')
    gdp_by_country_data = []

    for row in rows:
        cells = row.find_all('td')
        if len(cells) >= 7:
            country = cells[1].text.strip()
            gdp = cells[2].text.strip().replace('$', '').replace(',', '')
            population = cells[5].text.strip().replace(',', '')
            gdp_per_capita = cells[6].text.strip().replace('$', '').replace(',', '')
            gdp_by_country_data.append((country, gdp, population, gdp_per_capita))
    
    return gdp_by_country_data

def create_database(gdp_by_country_data):
    conn = sqlite3.connect('gdp_by_country.db')
    cursor = conn.cursor()
    cursor.execute('''CREATE TABLE IF NOT EXISTS gdp_by_country_data (id INTEGER PRIMARY KEY, country TEXT UNIQUE, gdp REAL, population INTEGER, gdp_per_capita REAL)''')
    
    for country, gdp, population, gdp_per_capita in gdp_by_country_data:
        try:
            cursor.execute("INSERT INTO gdp_by_country_data (country, gdp, population, gdp_per_capita) VALUES (?, ?, ?, ?)", (country, gdp, population, gdp_per_capita))
        except sqlite3.IntegrityError:
            # Handle duplicate data
            pass
    
    conn.commit()
    conn.close()

def main():
    gdp_by_country_data = scrape_gdp_by_country()
    if gdp_by_country_data:
        create_database(gdp_by_country_data)

if __name__ == "__main__":
    main()




In [3]:
import csv
import sqlite3

def update_database(csv_file, db_file):
    # Read data from CSV file
    with open(csv_file, 'r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        country_dict = {row['en_short_name']: row['nationality'] for row in reader}

    # Connect to the SQLite database
    conn = sqlite3.connect(db_file)
    cursor = conn.cursor()

    # Iterate over the country_dict and update the database
    for country_name, nationality in country_dict.items():
        try:
            cursor.execute("UPDATE gdp_by_country_data SET country = ? WHERE country = ?", (nationality, country_name))
        except sqlite3.IntegrityError:
            print(f"Duplicate entry found for {country_name}, skipping update.")

    # Commit changes and close connection
    conn.commit()
    conn.close()

# Call the function to update the database
update_database('Data/countries.csv', 'gdp_by_country.db')


view_database('gdp_by_country.db')

Duplicate entry found for Albania, skipping update.
Duplicate entry found for Algeria, skipping update.
Duplicate entry found for Andorra, skipping update.
Duplicate entry found for Angola, skipping update.
Duplicate entry found for Antigua and Barbuda, skipping update.
Duplicate entry found for Argentina, skipping update.
Duplicate entry found for Armenia, skipping update.
Duplicate entry found for Australia, skipping update.
Duplicate entry found for Austria, skipping update.
Duplicate entry found for Azerbaijan, skipping update.
Duplicate entry found for Bahamas, skipping update.
Duplicate entry found for Bahrain, skipping update.
Duplicate entry found for Bangladesh, skipping update.
Duplicate entry found for Barbados, skipping update.
Duplicate entry found for Belarus, skipping update.
Duplicate entry found for Belgium, skipping update.
Duplicate entry found for Belize, skipping update.
Duplicate entry found for Benin, skipping update.
Duplicate entry found for Bosnia and Herzegov