# Guided Project: Analyzing CIA Factbook Data Using SQL

In [1]:
import pandas as pd
import sqlite3

In [2]:
# Connect to the Database
conn = sqlite3.connect("factbook.db")
c = conn.cursor()

## Get Information About the Database

In [3]:
disc = c.execute(' SELECT * FROM sqlite_master WHERE type="table" ').fetchall()

disc

[('table',
  'sqlite_sequence',
  'sqlite_sequence',
  3,
  'CREATE TABLE sqlite_sequence(name,seq)'),
 ('table',
  'facts',
  'facts',
  47,
  'CREATE TABLE "facts" ("id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, "code" varchar(255) NOT NULL, "name" varchar(255) NOT NULL, "area" integer, "area_land" integer, "area_water" integer, "population" integer, "population_growth" float, "birth_rate" float, "death_rate" float, "migration_rate" float)')]

In [4]:
# Facts table
header = c.execute('PRAGMA table_info(facts)').fetchall()
rows = c.execute('SELECT * FROM facts').fetchall()

print(header)
print("\n")
print(rows[:5])

[(0, 'id', 'INTEGER', 1, None, 1), (1, 'code', 'varchar(255)', 1, None, 0), (2, 'name', 'varchar(255)', 1, None, 0), (3, 'area', 'integer', 0, None, 0), (4, 'area_land', 'integer', 0, None, 0), (5, 'area_water', 'integer', 0, None, 0), (6, 'population', 'integer', 0, None, 0), (7, 'population_growth', 'float', 0, None, 0), (8, 'birth_rate', 'float', 0, None, 0), (9, 'death_rate', 'float', 0, None, 0), (10, 'migration_rate', 'float', 0, None, 0)]


[(1, 'af', 'Afghanistan', 652230, 652230, 0, 32564342, 2.32, 38.57, 13.89, 1.51), (2, 'al', 'Albania', 28748, 27398, 1350, 3029278, 0.3, 12.92, 6.58, 3.3), (3, 'ag', 'Algeria', 2381741, 2381741, 0, 39542166, 1.84, 23.67, 4.31, 0.92), (4, 'an', 'Andorra', 468, 468, 0, 85580, 0.12, 8.13, 6.96, 0.0), (5, 'ao', 'Angola', 1246700, 1246700, 0, 19625353, 2.78, 38.78, 11.49, 0.46)]


## General Summary Statistics

In [5]:
# to view in a pandas dataframe
df = pd.read_sql_query("SELECT * FROM facts", conn)

In [6]:
min_pop = c.execute("SELECT MIN(population) FROM facts").fetchone()
max_pop = c.execute("SELECT MAX(population) FROM facts").fetchone()
min_grow = c.execute("SELECT MIN(population_growth) FROM facts").fetchone()
max_grow = c.execute("SELECT MAX(population_growth) FROM facts").fetchone()

print("Min population is {} and max population is {}".format(min_pop[0],max_pop[0]))
print("Min growth is {} and max growth is {}".format(min_grow[0],max_grow[0]))

Min population is 0 and max population is 7256490011
Min growth is 0.0 and max growth is 4.02


In [7]:
avg_pop = c.execute("SELECT AVG(population) FROM facts").fetchone()
avg_area = c.execute("SELECT AVG(area) FROM facts").fetchone()

print("Avg population for population is {:.0f}.".format(avg_pop[0]))
print("Avg area is {:.0f} square kilometers.".format(avg_area[0]))

Avg population for population is 62094928.
Avg area is 555094 square kilometers.


Finding countries that are above average in population and below average in area.

In [8]:
rows = c.execute("SELECT * FROM facts \
                  WHERE population > (SELECT AVG(population) FROM facts) \
                  AND area < (SELECT AVG(area) from facts)").fetchall()

In [9]:
for row in rows:
    print("{:>20}    Area (km^2):{:06}   Population: {:9d}".format(row[2],row[4],row[6],))

          Bangladesh    Area (km^2):130170   Population: 168957745
             Germany    Area (km^2):348672   Population:  80854408
               Japan    Area (km^2):364485   Population: 126919659
         Philippines    Area (km^2):298170   Population: 100998376
            Thailand    Area (km^2):510890   Population:  67976405
      United Kingdom    Area (km^2):241930   Population:  64088222
             Vietnam    Area (km^2):310070   Population:  94348835
