# Country Flags

In [1]:
# Dependencies
import pandas as pd
import requests as rs
from bs4 import BeautifulSoup as bs
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist


In [2]:
# Set up connection with chromedriver for viewing websites
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)


### Wikipedia Flags Images

In [3]:
# Open Wikipedia Website
flags_url = "https://en.m.wikipedia.org/wiki/Gallery_of_sovereign_state_flags"
browser.visit(flags_url)

# Create BeautifulSoup object; parse with 'html.parser'
flags_html = browser.html
flags_soup = bs(flags_html, 'html.parser')


In [4]:
# Retreive all items that contain flag information
items = flags_soup.find_all('li', class_='gallerybox')

# Create empty list for flags urls 
flags_image_urls = []

# Image main_url
main_url = "https://en.m.wikipedia.org"

# Variable for beginning of flag link
start_url = "https:"

# Loop through the items previously stored
for i in items: 
    
    # Store link that leads to full image website
    full_img_url = i.find('a', {'class': 'image', 'href' : True}).get('href')
    
    # Visit the link that contains the full image website 
    browser.visit(main_url + full_img_url)
    
    # HTML Object of individual country flag website 
    partial_img_html = browser.html
    
    # Parse HTML with Beautiful Soup for every individual country flag website 
    hemisphere_soup2 = bs( partial_img_html, 'html.parser')
    
    # Retrieve first image source 
    first_img_url = hemisphere_soup2.find('div', {'class' : 'fullImageLink'})
    
    # Retrieve near full image source 
    img_url = first_img_url.find('a')['href']
    
    # Append the retreived information into a list of dictionaries 
    flags_image_urls.append({start_url + img_url})
    

# Display first 10 results from flags_image_urls
flags_image_urls


[{'https://upload.wikimedia.org/wikipedia/commons/9/9a/Flag_of_Afghanistan.svg'},
 {'https://upload.wikimedia.org/wikipedia/commons/3/36/Flag_of_Albania.svg'},
 {'https://upload.wikimedia.org/wikipedia/commons/7/77/Flag_of_Algeria.svg'},
 {'https://upload.wikimedia.org/wikipedia/commons/1/19/Flag_of_Andorra.svg'},
 {'https://upload.wikimedia.org/wikipedia/commons/9/9d/Flag_of_Angola.svg'},
 {'https://upload.wikimedia.org/wikipedia/commons/8/89/Flag_of_Antigua_and_Barbuda.svg'},
 {'https://upload.wikimedia.org/wikipedia/commons/1/1a/Flag_of_Argentina.svg'},
 {'https://upload.wikimedia.org/wikipedia/commons/2/2f/Flag_of_Armenia.svg'},
 {'https://upload.wikimedia.org/wikipedia/commons/8/88/Flag_of_Australia_%28converted%29.svg'},
 {'https://upload.wikimedia.org/wikipedia/commons/4/41/Flag_of_Austria.svg'},
 {'https://upload.wikimedia.org/wikipedia/commons/d/dd/Flag_of_Azerbaijan.svg'},
 {'https://upload.wikimedia.org/wikipedia/commons/9/93/Flag_of_the_Bahamas.svg'},
 {'https://upload.wiki

In [11]:
# Convert list into a dataframe
flags_df = pd.DataFrame(flags_image_urls, columns =['flags'])
flags_df.head()


Unnamed: 0,flags
0,https://upload.wikimedia.org/wikipedia/commons...
1,https://upload.wikimedia.org/wikipedia/commons...
2,https://upload.wikimedia.org/wikipedia/commons...
3,https://upload.wikimedia.org/wikipedia/commons...
4,https://upload.wikimedia.org/wikipedia/commons...


In [29]:
# Clean DataFrame 
# Copy 'flags' column to edit new column titled 'country'
flags_df['copy_country'] = flags_df['flags']

# Split 'country' column based on 'of_'
flags_df[['unwanted','partial_country']]=flags_df['copy_country'].str.split('of_', expand=True,n=1)

# Split 'partial_country' column based on '.svg'
flags_df[['almost_country','svg']]=flags_df['partial_country'].str.split('.', expand=True,n=1)

# Split 'almost_country' column based on '%'
flags_df[['country','%']]=flags_df['almost_country'].str.split('%', expand=True,n=1)

# Delete 'country' and 'unwanted' columns
del flags_df['copy_country']
del flags_df['unwanted']
del flags_df['partial_country']
del flags_df['svg']
del flags_df['almost_country']
del flags_df['%']

flags_df.head()


Unnamed: 0,flags,country
0,https://upload.wikimedia.org/wikipedia/commons...,Afghanistan
1,https://upload.wikimedia.org/wikipedia/commons...,Albania
2,https://upload.wikimedia.org/wikipedia/commons...,Algeria
3,https://upload.wikimedia.org/wikipedia/commons...,Andorra
4,https://upload.wikimedia.org/wikipedia/commons...,Angola


In [28]:
# Export dataframe to csv
flags_df.to_csv('data/flags_df.csv')


In [8]:
# Quit chromedriver
browser.quit() 
    