Visa matrix to list of Available countries

In [23]:
import pandas as pd


# Choose a specific country
COUNTRY_TARGET = 'Russia'

# Assuming you have a DataFrame 'visa_data' with the visa information
visa_data = pd.read_csv('../data/passport-index-matrix.csv', index_col=0)

# Function to get visa information in the specified format
def get_visa_info(country, category, description):
    return f"{country}, {category}, {description}"

# Categories that are considered valid
available_categories = ['visa free', 'visa on arrival', 'e-visa'] + list(map(str, range(10, 361)))

# Function to get visa information in the specified format for a specific country
def get_visa_info_for_country(country_target, visa_data):
    visa_info = []
    for country in visa_data.columns:
        if country_target != country:
            category = visa_data.loc[country_target, country]
            if category in available_categories:
                description = "Country description"  # Replace with actual description
                visa_info.append(get_visa_info(country, category, description))
    return visa_info

# Get and print visa information for the chosen country
visa_info_for_target_country = get_visa_info_for_country(COUNTRY_TARGET, visa_data)

# Write visa information to a CSV file
output_file_path = '../data/available_countries.csv'
with open(output_file_path, 'w', encoding='utf-8') as file:
    file.write("countries,category,description\n")
    for info in visa_info_for_target_country:
        file.write(info + '\n')
    


Available countries to KML file with borders

In [14]:
from bs4 import BeautifulSoup
import csv
import pandas as pd


try: COUNTRY_TARGET
except NameError: COUNTRY_TARGET = 'Russia'


def read_kml(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()
        soup = BeautifulSoup(content, 'xml')
        placemarks = soup.find_all('Placemark')
        countries = {}
        for placemark in placemarks:
            name = placemark.find('name').text.strip()
            geometry = str(placemark.find('MultiGeometry')) 
            countries[name] = {'geometry': geometry}
        return countries

def process_csv(csv_file, countries_data):
    with open(csv_file, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        header = next(reader)
        rows = list(reader)

    output_data = []

    for row in rows:
        country_name = row[0]
        if country_name in countries_data:
            category = row[1]
            if category.replace(" ", "").isdecimal():
                category = int(category)
                if category < 30:
                    category = 'less 30'
                elif category >= 30 and category <=45:
                    category = '30-45'
                    
            description = row[2]
            geometry = countries_data[country_name]['geometry']
            
            output_data.append({
                'category': category,
                'name': country_name,
                'description': get_description(country_name),
                'geometry': geometry
            })
        else:
            print(f"Country '{country_name}' don't found")

    return header, output_data


def get_countries_elements():
    cost_of_living = pd.read_csv('../data/cost_of_living.csv')
    quality_of_life = pd.read_csv('../data/quality_of_life.csv')
    df = pd.merge(cost_of_living, quality_of_life, on='Country', how='outer', left_index=False, right_index=False)
    return df


def categorize_value(value, param_name, countries_elements):
    if param_name == 'Country':
        return value
    
    if pd.notna(countries_elements[param_name]).any():
        min_value = countries_elements[param_name].min()
        max_value = countries_elements[param_name].max()

        # Задайте интервал значений параметра
        value_range = (min_value, max_value)

        # Разделите интервал на 6 частей
        interval_size = (value_range[1] - value_range[0]) / 6

        # Формируйте автоматически названия диапазонов
        labels = [f'{int(i * interval_size) + 1}-{int((i + 1) * interval_size)}' for i in range(6)]
        labels.append(f'{int((6 + 1) * interval_size) + 1}-{int(value_range[1])}')

        # Используйте pd.cut для категоризации значения
        category_index = pd.cut([value], bins=list(range(int(value_range[0]), int(value_range[1]) + int(interval_size), int(interval_size))), labels=labels, include_lowest=True).codes[0]

        # Если значение находится в диапазоне, вернуть соответствующую строку
        if category_index >= 0:
            return labels[category_index]
        else:
            return ''
    else:
        return ''


def write_county_structure(out_file, country, countries_elements):
    out_file.write(f'    <Placemark>\n')
    out_file.write(f'      <name>{country["category"]}</name>\n')
    out_file.write(f'      <description>{country["description"]}</description>\n')
    out_file.write(f'      <ExtendedData>\n')

    matching_rows = countries_elements[countries_elements['Country'].str.strip() == country['name'].strip()]
    if not matching_rows.empty:
        country_data = matching_rows.iloc[0]
        
        # Запись параметров страны в формате <Data name="параметр"><value>значение</value></Data>
        for param_name, param_value in country_data.items():
            if param_name == 'Rank_x' or param_name == 'Rank_y':
                continue
            
            param_value = categorize_value(param_value, param_name, countries_elements)
            out_file.write(f'        <Data name="{param_name}">\n')
            out_file.write(f'          <value>{param_value}</value>\n')
            out_file.write(f'        </Data>\n')
    else:
        print(f"No match found for country: {country['name']}")

    out_file.write(f'      </ExtendedData>\n')
    out_file.write(f'      {country["geometry"]}\n')
    out_file.write(f'    </Placemark>\n')


def get_description(country):
    return ""  # Replace with actual description


def write_kml(output_file, header, data):
    countries_elements = get_countries_elements()
    
    with open(output_file, 'w', encoding='utf-8') as out_file:
        out_file.write('<?xml version="1.0" encoding="UTF-8"?>\n')
        out_file.write('<kml xmlns="http://www.opengis.net/kml/2.2">\n')
        out_file.write('  <Document>\n')
        out_file.write(f'    <name>Countries without visa for {COUNTRY_TARGET}</name>\n')

        for country in data:
            # print(country)
            write_county_structure(out_file, country, countries_elements)

        out_file.write('  </Document>\n')
        out_file.write('</kml>\n')

# Read from KML
countries_data = read_kml('../data/countries.kml')

header, output_data = process_csv('../data/available_countries.csv', countries_data)

# Write to KML
write_kml(f'../Countries without visa for {COUNTRY_TARGET}.kml', header, output_data)


Country 'Ivory Coast' don't found
Country 'Hong Kong' don't found
Country 'Macao' don't found
Country 'Palestine' don't found
No match found for country: Angola
No match found for country: Antigua and Barbuda
No match found for country: Benin
No match found for country: Bhutan
No match found for country: Bosnia and Herzegovina
No match found for country: Burkina Faso
No match found for country: Burundi
No match found for country: Cape Verde
No match found for country: Comoros
No match found for country: DR Congo
No match found for country: Djibouti
No match found for country: Dominica
No match found for country: Equatorial Guinea
No match found for country: Swaziland
No match found for country: Gabon
No match found for country: Gambia
No match found for country: Grenada
No match found for country: Guinea
No match found for country: Guinea-Bissau
No match found for country: Haiti
No match found for country: Kiribati
No match found for country: Laos
No match found for country: Lesotho
No

In [2]:
cost_of_living = pd.read_csv('../data/cost_of_living.csv')
quality_of_life = pd.read_csv('../data/quality_of_life.csv')
df = pd.merge(cost_of_living, quality_of_life, on='Country', how='outer')

In [4]:
df[' Cost of Living Index_x'].max()

141.1

In [74]:
df.to_csv('output.csv', index=False)