In [6]:
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt

#### creating a connection instance

In [7]:
conn = sqlite3.connect('factbook.db')

#### selecting all tables

In [8]:
query = "select * from sqlite_master where type='table'"
pd.read_sql_query(query, conn)

Unnamed: 0,type,name,tbl_name,rootpage,sql
0,table,sqlite_sequence,sqlite_sequence,3,"CREATE TABLE sqlite_sequence(name,seq)"
1,table,facts,facts,47,"CREATE TABLE ""facts"" (""id"" INTEGER PRIMARY KEY..."


#### we see that we have two tables in database

#### selecting first five rows from facts table

In [9]:
facts_query = "select * from facts LIMIT 5"
pd.read_sql_query(facts_query, conn)

Unnamed: 0,id,code,name,area,area_land,area_water,population,population_growth,birth_rate,death_rate,migration_rate
0,1,af,Afghanistan,652230,652230,0,32564342,2.32,38.57,13.89,1.51
1,2,al,Albania,28748,27398,1350,3029278,0.3,12.92,6.58,3.3
2,3,ag,Algeria,2381741,2381741,0,39542166,1.84,23.67,4.31,0.92
3,4,an,Andorra,468,468,0,85580,0.12,8.13,6.96,0.0
4,5,ao,Angola,1246700,1246700,0,19625353,2.78,38.78,11.49,0.46


#### minimum population value in facts table

In [10]:
min_pop_query = "select MIN(population) from facts"
pd.read_sql_query(min_pop_query, conn)

Unnamed: 0,MIN(population)
0,0


#### maximum population value in facts table

In [11]:
max_pop_query = "select MAX(population) from facts"
pd.read_sql_query(max_pop_query, conn)

Unnamed: 0,MAX(population)
0,7256490011


#### minimum population growth value in facts table

In [12]:
min_pop_growth_query = "select MIN(population_growth) from facts"
pd.read_sql_query(min_pop_growth_query, conn)

Unnamed: 0,MIN(population_growth)
0,0.0


#### maximum population growth value in facts table

In [13]:
max_pop_growth_query = "select MAX(population_growth) from facts"
pd.read_sql_query(max_pop_growth_query, conn)

Unnamed: 0,MAX(population_growth)
0,4.02


#### country(ies) with minimum population

In [14]:
cont_query_min_pop = "select name from facts where population = 0"
pd.read_sql_query(cont_query_min_pop, conn)

Unnamed: 0,name
0,Antarctica


#### country(ies) with maximum population

In [15]:
cont_query_max_pop = "select name from facts where population = 7256490011"
pd.read_sql_query(cont_query_max_pop, conn)

Unnamed: 0,name
0,World


#### Distribution of countries population

In [16]:
def draw_hist(df, n_rows, n_cols):
    fig=plt.figure()
    for i, col in enumerate(df.columns):
        ax=fig.add_subplot(n_rows,n_cols,i+1)
        df[col].hist(bins=10,ax=ax)
        ax.set_title(col +" Distribution")
    fig.tight_layout()  # Improves appearance a bit.
    plt.show()

In [17]:
hist_query = "select population, population_growth, birth_rate, death_rate from facts where population != 0 and population != 7256490011"
df = pd.read_sql_query(hist_query, conn)
draw_hist(df, 2, 2)

<matplotlib.figure.Figure at 0x7f7e0dda5630>

#### Which countries have the highest population density?

In [18]:
query_ = "select name from facts where CAST(population as Float)/CAST(area_land as Float) = (select MAX(CAST(population as Float)/CAST(area_land as Float)) from facts)"
pd.read_sql_query(query_, conn)

Unnamed: 0,name
0,Macau


#### population densities of all countries

In [19]:
query_pop_density = "select name, CAST(population as Float)/CAST(area_land as Float) as population_density from facts"
pd.read_sql_query(query_pop_density, conn)

Unnamed: 0,name,population_density
0,Afghanistan,49.927697
1,Albania,110.565662
2,Algeria,16.602211
3,Andorra,182.863248
4,Angola,15.741841
5,Antigua and Barbuda,209.131222
6,Argentina,15.870225
7,Armenia,108.370812
8,Australia,2.961485
9,Austria,105.107041


#### some countries have population density = NaN, this could be because of missing value of one or both of population and area_land(as we can see below)

In [24]:
query_nan = 'select name, population, area_land from facts where name = "Wake Island"'
pd.read_sql_query(query_nan, conn)

Unnamed: 0,name,population,area_land
0,Wake Island,,6


#### Which countries have the highest ratios of water to land? 

In [31]:
query_water_to_land_ratio = 'select name as country, CAST(area_water as Float)/CAST(area_land as Float) as water_to_land_ratio from facts ORDER BY water_to_land_ratio DESC'
pd.read_sql_query(query_water_to_land_ratio, conn)

Unnamed: 0,country,water_to_land_ratio
0,British Indian Ocean Territory,905.666667
1,Virgin Islands,4.520231
2,Puerto Rico,0.554791
3,"Bahamas, The",0.386613
4,Guinea-Bissau,0.284673
5,Malawi,0.259396
6,Netherlands,0.225710
7,Uganda,0.222922
8,Eritrea,0.164356
9,Liberia,0.156240
