In [1]:
# Import the pandas library under its usual alias 
import pandas as pd

# Load the business.csv file as a DataFrame called businesses
businesses = pd.read_csv('datasets/businesses.csv')

# Sort businesses from oldest businesses to youngest
sorted_businesses = businesses.sort_values('year_founded', ascending = False)

# Display the first few lines of sorted_businesses
sorted_businesses.head()

Unnamed: 0,business,year_founded,category_code,country_code
113,Meridian Corporation,1999,CAT13,XK
15,Guinea Ecuatorial Airlines,1996,CAT2,GNQ
42,Ivory Bank,1994,CAT3,SSD
162,European Trust Company,1991,CAT3,VUT
9,Central Bank of the Comoros,1981,CAT3,COM


In [2]:
# Load countries.csv to a DataFrame
countries = pd.read_csv('datasets/countries.csv')

# Merge sorted_businesses with countries
businesses_countries = sorted_businesses.merge(countries, on = 'country_code')

# Filter businesses_countries to include countries in North America only
north_america = businesses_countries[businesses_countries['continent'].isin(['North America'])]
north_america.head()

Unnamed: 0,business,year_founded,category_code,country_code,country,continent
37,1st National Bank of St Lucia,1938,CAT3,LCA,Saint Lucia,North America
46,Cubana de Aviaci贸n,1929,CAT2,CUB,Cuba,North America
51,Corporaci贸n Multi Inversiones,1920,CAT11,GTM,Guatemala,North America
57,The Chronicle (Dominica),1909,CAT13,DMA,Dominica,North America
58,Florida Ice and Farm Company,1908,CAT9,CRI,Costa Rica,North America


In [3]:
# Create continent, which lists only the continent and oldest year_founded

continent = businesses_countries[['continent', 'year_founded']].groupby('year_founded').min()

# Merge continent with businesses_countries
merged_continent = continent.merge(businesses_countries, on='continent')

# Subset continent so that only the four columns of interest are included
subset_merged_continent = merged_continent[['continent', 'country', 'business', 'year_founded']]
subset_merged_continent.head()

Unnamed: 0,continent,country,business,year_founded
0,Asia,Seychelles,Air Seychelles,1977
1,Asia,Yemen,Yemenia Airways,1962
2,Asia,Lao People's Democratic Republic,Electricite du Laos,1959
3,Asia,Bhutan,Tashi Group,1959
4,Asia,Cambodia,National Bank of Cambodia,1954


In [4]:
# Use .merge() to create a DataFrame, all_countries
all_countries = businesses.merge(countries, on='country_code', how = 'right', indicator =True)

# Filter to include only countries without oldest businesses
missing_countries = all_countries[all_countries['_merge'] != 'both']

# Create a series of the country names with missing oldest business data
missing_countries_series = missing_countries["country"]

# Display the series
print(missing_countries_series)

1                                Angola
7                   Antigua and Barbuda
18                              Bahamas
48                   Dominican Republic
50                              Ecuador
57                                 Fiji
59      Micronesia, Federated States of
63                                Ghana
65                               Gambia
69                              Grenada
79            Iran, Islamic Republic of
89                           Kyrgyzstan
91                             Kiribati
92                Saint Kitts and Nevis
107                              Monaco
108                Moldova, Republic of
110                            Maldives
112                    Marshall Islands
131                               Nauru
138                               Palau
139                    Papua New Guinea
143                            Paraguay
144                 Palestine, State of
153                     Solomon Islands
160                            Suriname


In [5]:
# Import new_businesses.csv
new_businesses = pd.read_csv("datasets/new_businesses.csv")

# Add the data in new_businesses to the existing businesses
all_businesses = pd.concat([new_businesses, businesses])

# Merge and filter to find countries with missing business data
new_all_countries = all_businesses.merge(countries, on="country_code", how="outer",  indicator=True)
new_missing_countries = new_all_countries[new_all_countries["_merge"] != "both"]

# Group by continent and create a "count_missing" column
count_missing = new_missing_countries.groupby("continent").agg({"country":"count"})
count_missing.columns = ["count_missing"]
count_missing

FileNotFoundError: [Errno 2] No such file or directory: 'datasets/new_businesses.csv'