In [3]:
# Instalación de dependencias
!pip install requests beautifulsoup4 pandas pycountry deep-translator

Collecting pycountry
  Downloading pycountry-24.6.1-py3-none-any.whl.metadata (12 kB)
Collecting deep-translator
  Downloading deep_translator-1.11.4-py3-none-any.whl.metadata (30 kB)
Downloading pycountry-24.6.1-py3-none-any.whl (6.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m92.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading deep_translator-1.11.4-py3-none-any.whl (42 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.3/42.3 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pycountry, deep-translator
Successfully installed deep-translator-1.11.4 pycountry-24.6.1


In [4]:
# Importaciones
import pandas as pd
import requests
from deep_translator import GoogleTranslator

In [5]:
# Descargar los archivos necesarios
!wget -N https://download.geonames.org/export/dump/countryInfo.txt
!wget -N https://download.geonames.org/export/dump/admin1CodesASCII.txt
!wget -N https://download.geonames.org/export/dump/allCountries.zip
!unzip -o allCountries.zip

--2025-03-24 15:34:12--  https://download.geonames.org/export/dump/countryInfo.txt
Resolving download.geonames.org (download.geonames.org)... 5.9.152.54
Connecting to download.geonames.org (download.geonames.org)|5.9.152.54|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 31667 (31K) [text/plain]
Saving to: ‘countryInfo.txt’


2025-03-24 15:34:13 (336 KB/s) - ‘countryInfo.txt’ saved [31667/31667]

--2025-03-24 15:34:13--  https://download.geonames.org/export/dump/admin1CodesASCII.txt
Resolving download.geonames.org (download.geonames.org)... 5.9.152.54
Connecting to download.geonames.org (download.geonames.org)|5.9.152.54|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 147658 (144K) [text/plain]
Saving to: ‘admin1CodesASCII.txt’


2025-03-24 15:34:13 (391 KB/s) - ‘admin1CodesASCII.txt’ saved [147658/147658]

--2025-03-24 15:34:13--  https://download.geonames.org/export/dump/allCountries.zip
Resolving download.geonames.org (download.geo

In [6]:
# Funciones Auxiliares
def translate_name(name):
    """Traduce el nombre de un país de inglés a español."""
    try:
        return GoogleTranslator(source='en', target='es').translate(name)
    except Exception as e:
        print(f"Error al traducir {name}: {e}")
        return name

In [7]:
# Funciones Auxiliares
def get_country_extra_data():
    """
    Obtiene desde la API de restcountries.com los datos adicionales:
      - calling_code
      - currency_code, currency_name, currency_symbol
      - languages
    Retorna 5 diccionarios indexados por el código ISO2 (cca2).
    """
    url = "https://restcountries.com/v3.1/all"

    try:
        response = requests.get(url)
        response.raise_for_status()

    except Exception as e:
        print("Error al obtener datos de restcountries.com:", e)
        return {}, {}, {}, {}, {}

    data = response.json()
    calling_dict = {}
    currency_dict = {}
    currency_name_dict = {}
    currency_symbol_dict = {}
    language_dict = {}

    for country in data:
        cca2 = country.get("cca2", "").upper()
        idd = country.get("idd", {})
        root = idd.get("root", "")
        suffixes = idd.get("suffixes", [])
        calling_dict[cca2] = root + suffixes[0] if root and suffixes else ""
        currencies = country.get("currencies", {})

        if currencies:
            currency_code = list(currencies.keys())[0]
            currency_data = currencies[currency_code]
            currency_dict[cca2] = currency_code
            currency_name_dict[cca2] = currency_data.get("name", "")
            currency_symbol_dict[cca2] = currency_data.get("symbol", "")

        languages = country.get("languages", {})
        language_dict[cca2] = ", ".join(languages.values())

    return calling_dict, currency_dict, currency_name_dict, currency_symbol_dict, language_dict

In [8]:
# Procesamiento de Datos
def process_countries():
    """Procesa el archivo countryInfo.txt para extraer la información básica de los países."""
    countries_data = []

    with open('countryInfo.txt', 'r', encoding='utf-8') as f:
        for line in f:
            if line.startswith('#'):
                continue
            fields = line.split('\t')

            if len(fields) < 12:
                continue
            iso_code = fields[0].strip()
            iso_code3 = fields[1].strip()
            numeric_code = fields[2].strip()
            name_en = fields[4].strip()
            capital = fields[5].strip()
            countries_data.append({
                'iso_code': iso_code,
                'iso_code3': iso_code3,
                'numeric_code': numeric_code,
                'name_en': name_en,
                'capital': capital
            })

    df = pd.DataFrame(countries_data)
    df['name'] = df['name_en'].apply(translate_name)
    df['flag_png'] = df['iso_code'].str.lower().apply(lambda code: f"https://flagcdn.com/w320/{code}.png")
    df['flag_svg'] = df['iso_code'].str.lower().apply(lambda code: f"https://flagcdn.com/{code}.svg")

    return df.drop_duplicates('iso_code').copy()

In [9]:
# Procesamiento de Datos
def process_regions():
    """Procesa el archivo admin1CodesASCII.txt para extraer regiones."""
    regions = []
    with open('admin1CodesASCII.txt', 'r', encoding='utf-8') as f:
        for line in f:
            parts = line.split('\t')
            if len(parts) < 4:
                continue
            code, name, _, _ = parts
            country_code, region_code = code.split('.')
            regions.append({
                'country_iso': country_code.strip(),
                'iso_code': f"{country_code.strip()}-{region_code.strip().zfill(2)}",
                'name': name.strip()
            })
    return pd.DataFrame(regions).drop_duplicates(['country_iso', 'iso_code']).copy()

In [10]:
# Procesamiento de Datos
def process_cities(regions):
    """Procesa el archivo allCountries.txt para extraer ciudades."""
    cities = []
    with open('allCountries.txt', 'r', encoding='utf-8') as f:
        for line in f:
            fields = line.split('\t')
            if len(fields) < 15:
                continue
            cities.append({
                'name': fields[1].strip(),
                'country_iso': fields[8].strip(),
                'region_code': fields[10].strip().zfill(2),
                'latitude': float(fields[4].strip()),
                'longitude': float(fields[5].strip()),
                'population': int(fields[14].strip()) if fields[14].strip() else None,
                'is_capital': (fields[7].strip() == 'PPLC')
            })
    df = pd.DataFrame(cities).drop_duplicates(['country_iso', 'name']).copy()
    valid_regions = regions['iso_code'].unique()
    df['region_iso'] = df['country_iso'] + '-' + df['region_code']
    df['region_iso'] = df['region_iso'].where(df['region_iso'].isin(valid_regions), None)
    return df

In [11]:
def main():
    # Obtener datos adicionales desde RestCountries
    calling_dict, currency_dict, currency_name_dict, currency_symbol_dict, language_dict = get_country_extra_data()

    # Procesar archivos de Geonames
    df_countries = process_countries()
    df_regions = process_regions()
    df_cities = process_cities(df_regions)

    # Agregar datos extra a df_countries
    df_countries['calling_code'] = df_countries['iso_code'].apply(lambda code: calling_dict.get(code, ""))
    df_countries['currency_code'] = df_countries['iso_code'].apply(lambda code: currency_dict.get(code, ""))
    df_countries['currency_name'] = df_countries['iso_code'].apply(lambda code: currency_name_dict.get(code, ""))
    df_countries['currency_symbol'] = df_countries['iso_code'].apply(lambda code: currency_symbol_dict.get(code, ""))
    df_countries['languages'] = df_countries['iso_code'].apply(lambda code: language_dict.get(code, ""))

    # Crear DataFrames para Country (modelo principal) y CountryInfo (datos extra)
    df_country_main = df_countries[['iso_code', 'iso_code3', 'numeric_code', 'name', 'name_en']]
    df_country_info = df_countries[['iso_code', 'capital', 'flag_png', 'flag_svg', 'calling_code',
                                    'currency_code', 'currency_name', 'currency_symbol', 'languages']].rename(columns={'iso_code': 'country_iso'})

    # Exportar datos a CSV con codificación UTF-8
    df_country_main.to_csv('countries.csv', index=False, encoding="utf-8-sig")
    df_country_info.to_csv('country_info.csv', index=False, encoding="utf-8-sig")
    df_regions.to_csv('regions.csv', index=False, encoding="utf-8-sig")
    df_cities.to_csv('cities.csv', index=False, encoding="utf-8-sig")

    print("Archivos CSV generados:")
    print(" - countries.csv (modelo Country)")
    print(" - country_info.csv (modelo CountryInfo)")
    print(" - regions.csv")
    print(" - cities.csv")

In [12]:
main()

Archivos CSV generados:
 - countries.csv (modelo Country)
 - country_info.csv (modelo CountryInfo)
 - regions.csv
 - cities.csv
