## Travel Agency Analysis

In [None]:
# Data retrieval

from utils.config import BASE_URL, ENDPOINT
from utils.api_manager import get_data
from utils.data_extractor import extract_countries_data

def main():
    url = f"{BASE_URL}/{ENDPOINT}"
    countries_data = get_data(url)

    extracted_data = extract_countries_data(countries_data)

    return extracted_data

if __name__ == "__main__":
    extracted_data = main()
    print(len(extracted_data))
    print(extracted_data)

In [3]:
import pandas as pd

In [5]:
df = pd.DataFrame(extracted_data)
df.head()

Unnamed: 0,country_name,independence,united_nation_members,startOfWeek,official_country_name,common_native_name,currency_code,currency_name,currency_symbol,country_code,capital,region,sub_region,languages,area,population,continent
0,South Georgia,False,False,monday,South Georgia and the South Sandwich Islands,South Georgia,SHP,Saint Helena pound,£,500,King Edward Point,Antarctic,,English,3903.0,30,Antarctica
1,Grenada,True,True,monday,Grenada,Grenada,XCD,Eastern Caribbean dollar,$,1473,St. George's,Americas,Caribbean,English,344.0,112519,North America
2,Switzerland,True,True,monday,Swiss Confederation,"Suisse, Schweiz, Svizzera, Svizra",CHF,Swiss franc,Fr.,41,Bern,Europe,Western Europe,"French, Swiss German, Italian, Romansh",41284.0,8654622,Europe
3,Sierra Leone,True,True,monday,Republic of Sierra Leone,Sierra Leone,SLL,Sierra Leonean leone,Le,232,Freetown,Africa,Western Africa,English,71740.0,7976985,Africa
4,Hungary,True,True,monday,Hungary,Magyarország,HUF,Hungarian forint,Ft,36,Budapest,Europe,Central Europe,Hungarian,93028.0,9749763,Europe


In [41]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
area,250.0,600584.8,1909805.0,0.44,1194.25,64929.5,372726.0,17098240.0
population,250.0,31110890.0,129667300.0,0.0,221099.5,4912244.0,19025766.75,1402112000.0


### 1. How many countries speak french?

In [9]:
# Filter for countries where 'languages' column contains 'French'

french_speaking_countries = df[df['languages'].fillna('').str.contains('French')]

# Display the results
print("Number of countries that speak French:", len(french_speaking_countries))


Number of countries that speak French: 46


### 2. How many countries speak English?

In [10]:
# Filter for countries where 'languages' column contains 'English'

english_speaking_countries = df[df['languages'].fillna('').str.contains('English')]

# Display the results
print("Number of countries that speak English:", len(english_speaking_countries))

Number of countries that speak English: 91


### 3. How many countries have more than one official language?

In [11]:
# Count countries with more than one official language
more_than_one_offLan = df[df['languages'].fillna('').str.contains(',')]

print("Number of countries with more that one official language:", len(more_than_one_offLan))

Number of countries with more that one official language: 96


### 4. How many countries have the Euro as their official currency?

In [13]:
# Count countries with Euro as official currency
num_countries_with_euro = df[df['currency_name'] == 'Euro'].shape[0]

# Display the results
print("Number of countries with Euro as official currency:", num_countries_with_euro)

Number of countries with Euro as official currency: 36


### 5. How many countries are from West Europe?

In [14]:
# Count countries from west europe
Countries_west_eur = df[df['sub_region'] == 'Western Europe']

print("Number of countries from Western Europe:", len(Countries_west_eur))

Number of countries from Western Europe: 8


### 6. How many countries have not gained Independence?

In [15]:
# Filter countries that have not gained independence
Countries_no_ind = df[df['independence'] == False]
                                                  
print("Number of countries that have not gained independence:", len(Countries_no_ind))

Number of countries that have not gained independence: 55


### 7. How many distinct continent and how many countries from each?

In [16]:
# Aggregate to find number of countries per continent
continent_counts = df.groupby('continent')['country_name'].agg(['count']).reset_index()

continent_counts = continent_counts.rename(columns={'count': 'No of countries'})

# Count distinct continents
num_distinct_continents = len(continent_counts)

# Display results
print("Number of distinct continents:", num_distinct_continents)
print("\nCountries per continent:")

continent_counts

Number of distinct continents: 8

Countries per continent:


Unnamed: 0,continent,No of countries
0,Africa,58
1,Antarctica,5
2,Asia,50
3,Europe,52
4,"Europe, Asia",3
5,North America,41
6,Oceania,27
7,South America,14


### 8. No. of countries where the week does not start on Monday

In [19]:
# Filter countries with startOfWeek not monday
Country_week_start = df[df['startOfWeek'] != 'monday']

print("Number of countries where the week does not start on Monday:", len(Country_week_start))

Number of countries where the week does not start on Monday: 21


### 9. How many countries are not UN members?

In [22]:
# Filter countries not UN members
Country_notUN_members = df[df['united_nation_members'] != True]

print("Number of countries that are not UN members:", len(Country_notUN_members))

Number of countries that are not UN members: 58


### 10. How many countries are UN members?

In [23]:
# Filter countries that are UN members
Country_UN_members = df[df['united_nation_members'] == True]

print("Number of countries that are UN members:", len(Country_UN_members))

Number of countries that are UN members: 192


### 11. List 2 countries with the lowest population for each continent

In [26]:
# Sort DataFrame by population within each continent
df_sorted = df.sort_values(by=['continent', 'population'])

# Group by continent and select top 2 countries with lowest population
top_countries_per_continent = df_sorted.groupby('continent').head(2)

# Display the results
print("Two countries with lowest population for each continent:")
top_countries_per_continent[['continent', 'country_name', 'population']].reset_index(drop = True)

Two countries with lowest population for each continent:


Unnamed: 0,continent,country_name,population
0,Africa,"Saint Helena, Ascension and Tristan da Cunha",53192
1,Africa,Seychelles,98462
2,Antarctica,Bouvet Island,0
3,Antarctica,Heard Island and McDonald Islands,0
4,Asia,Cocos (Keeling) Islands,544
5,Asia,Christmas Island,2072
6,Europe,Vatican City,451
7,Europe,Svalbard and Jan Mayen,2562
8,"Europe, Asia",Azerbaijan,10110116
9,"Europe, Asia",Turkey,84339067


### 12. List 2 countries with the largest Area for each Continent

In [37]:
# Sort DataFrame by population within each continent
df_sorted = df.sort_values(by=['continent', 'area'], ascending= [True, False])

# Group by continent and select 2 countries with Largest Area
countries_per_continent = df_sorted.groupby('continent').head(2)

# Display the results
print("Two countries with largest area for each continent:")
countries_per_continent[['continent', 'country_name', 'area']].reset_index(drop=True)

Two countries with largest area for each continent:


Unnamed: 0,continent,country_name,area
0,Africa,Algeria,2381741.0
1,Africa,DR Congo,2344858.0
2,Antarctica,Antarctica,14000000.0
3,Antarctica,French Southern and Antarctic Lands,7747.0
4,Asia,China,9706961.0
5,Asia,India,3287590.0
6,Europe,Ukraine,603500.0
7,Europe,France,551695.0
8,"Europe, Asia",Russia,17098242.0
9,"Europe, Asia",Turkey,783562.0


### 13. Top 5 countries with the Largest Area

In [35]:
# Sort the DataFrame by 'Area' column in descending order and select top 5
top_5_countries = df.nlargest(5, 'area').reset_index()

# Result
print('Top 5 countries with the largest Area:')
top_5_countries[['country_name', 'area']]

Top 5 countries with the largest Area:


Unnamed: 0,country_name,area
0,Russia,17098242.0
1,Antarctica,14000000.0
2,Canada,9984670.0
3,China,9706961.0
4,United States,9372610.0


### 14. Top 5 countries with the lowest Area

In [34]:
# Sort the DataFrame by 'Area' column in ascending order and select bottom 5
bottom_5_countries = df.nsmallest(5, 'area').reset_index()

print('Top 5 countries with the lowest Area:')
bottom_5_countries[['country_name', 'area']]

Top 5 countries with the lowest Area:


Unnamed: 0,country_name,area
0,Vatican City,0.44
1,Monaco,2.02
2,Gibraltar,6.0
3,Tokelau,12.0
4,Cocos (Keeling) Islands,14.0
