In [21]:
!pip install sqlalchemy



In [22]:
import pandas as pd
from sqlalchemy import create_engine

In [25]:
def create_sqlalchemy_engine():
    # Database connection parameters
    db_username = 'mysqluser'
    db_password = 'mysqluser_pass'
    db_host = 'ovh'
    db_port = '3300'
    db_name = 'world'

    # Create the SQLAlchemy engine
    engine = create_engine(f'mysql+pymysql://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}')

    return engine

In [27]:
engine = create_sqlalchemy_engine()

## ERD

![World Database ERD](world_db_ERD.png)

## Easy Level

### 1. List all countries and their respective continents.

In [29]:
query = 'select name, continent from country order by continent;'
pd.read_sql(query, con=engine)

Unnamed: 0,name,continent
0,Afghanistan,Asia
1,United Arab Emirates,Asia
2,Armenia,Asia
3,Azerbaijan,Asia
4,Bangladesh,Asia
...,...,...
234,Peru,South America
235,Paraguay,South America
236,Suriname,South America
237,Uruguay,South America


### 2. Retrieve all cities and their corresponding countries.

In [32]:
query = '''
select
    city.name as city,
    country.name as country
from city
join country on city.countrycode = country.code
order by country ;'''

pd.read_sql(query, con=engine)

Unnamed: 0,city,country
0,Kabul,Afghanistan
1,Qandahar,Afghanistan
2,Herat,Afghanistan
3,Mazar-e-Sharif,Afghanistan
4,Tirana,Albania
...,...,...
4074,Bulawayo,Zimbabwe
4075,Chitungwiza,Zimbabwe
4076,Mount Darwin,Zimbabwe
4077,Mutare,Zimbabwe


### 3. Find all distinct continents in the database.

In [34]:
query = 'select distinct continent from country order by 1;'
pd.read_sql(query, con=engine)

Unnamed: 0,continent
0,Asia
1,Europe
2,North America
3,Africa
4,Oceania
5,Antarctica
6,South America


### 4. Count the number of countries in the database.

In [35]:
query = 'select count(distinct name) as number_countries from country ;'
pd.read_sql(query, con=engine)

Unnamed: 0,number_countries
0,239


### 5. Retrieve the population of 'Canada'.

In [36]:
query = '''
select name, population
from country
where name = 'Canada' ;'''
pd.read_sql(query, con=engine)

Unnamed: 0,name,population
0,Canada,31147000


### 6. Find the official language spoken in 'Germany'.

In [39]:
query = '''
select language, 'Germany' as country
from countrylanguage
where isofficial = 'T'
and countrycode = (select code from country where name = 'Germany') ;'''
pd.read_sql(query, con=engine)

Unnamed: 0,language,country
0,German,Germany


### 7. List all cities in 'France' along with their populations.


In [40]:
query = '''
select city.name as city, country.population as population
from city
join country on city.countrycode = country.code
where country.name = 'France'
order by population desc ; '''
pd.read_sql(query, con=engine)

Unnamed: 0,city,population
0,Paris,59225700
1,Marseille,59225700
2,Lyon,59225700
3,Toulouse,59225700
4,Nice,59225700
5,Nantes,59225700
6,Strasbourg,59225700
7,Montpellier,59225700
8,Bordeaux,59225700
9,Rennes,59225700


### 8. Retrieve all country codes and their respective names.

In [41]:
query = 'select name as country, code from country order by 2 ;'
pd.read_sql(query, con=engine)

Unnamed: 0,country,code
0,Aruba,ABW
1,Afghanistan,AFG
2,Angola,AGO
3,Anguilla,AIA
4,Albania,ALB
...,...,...
234,Yemen,YEM
235,Yugoslavia,YUG
236,South Africa,ZAF
237,Zambia,ZMB


### 9. Display the total population of all countries combined.


In [42]:
query = 'select sum(population) as total_pop from country ;'
pd.read_sql(query, con=engine)

Unnamed: 0,total_pop
0,6078749000.0


### 10. Find the largest city in 'USA'.

In [43]:
query = '''
select name as city,
population
from city
where countrycode = (
    select code from country
    where name = 'United States'
    )
order by population desc
limit 1 ;'''
pd.read_sql(query, con=engine)

Unnamed: 0,city,population
0,New York,8008278
