Count missing values

In [None]:
-- Select the count of the number of rows
SELECT COUNT(*)
  FROM fortune500;

In [None]:
-- Select the count of ticker, 
-- subtract from the total number of rows, 
-- and alias as missing
SELECT
  COUNT(*) - COUNT(ticker) AS missing
FROM fortune500;

In [None]:
-- Select the count of profits_change, 
-- subtract from total number of rows, and alias as missing
SELECT
  COUNT(*) - COUNT(profits_change) AS missing
FROM fortune500;

In [None]:
-- Select the count of industry, 
-- subtract from total number of rows, and alias as missing
SELECT
  COUNT(*) - COUNT(industry) AS missing
FROM fortune500;

Join tables

In [None]:
SELECT company.name
-- Table(s) to select from
FROM company
INNER JOIN fortune500
ON company.ticker = fortune500.ticker;

Read an entity relationship diagram

In [None]:
-- Count the number of tags with each type
SELECT 
  type, 
  COUNT(*) AS count
FROM tag_type
 -- To get the count for each type, what do you need to do?
GROUP BY type
 -- Order the results with the most common
 -- tag types listed first
ORDER BY type DESC;

In [None]:
-- Select the 3 columns desired
SELECT 
     company.name, 
     tag_type.tag, 
     tag_type.type
FROM company
-- Join to the tag_company table
INNER JOIN tag_company 
     ON company.Id = tag_company.company_id
-- Join to the tag_type table
INNER JOIN tag_type
     ON tag_company.tag = tag_type.tag
-- Filter to most common type
WHERE type='cloud';

Coalesce

In [None]:
-- Use coalesce
SELECT 
  COALESCE(industry, sector, 'Unknown') AS industry2,
  -- Don't forget to count!
  COUNT(*) 
FROM fortune500 
-- Group by what? (What are you counting by?)
GROUP BY industry2
-- Order results to see most common first
ORDER BY COUNT DESC
-- Limit results to get just the one value you want
LIMIT 1;

Coalesce with a self-join

In [None]:
SELECT
  company_original.parent_id,
  company_parent.id,
  company_original.name,
  company_original.ticker AS original_ticker,
  company_parent.ticker AS parent_ticker,
  COALESCE (company_original.ticker,company_parent.ticker) AS coalesce_result,
  fortune500.ticker AS fortune500_ticker,
  fortune500.title,
  rank
-- Start with original company information
FROM company AS company_original
-- Join to another copy of company with parent
-- company information
LEFT JOIN company AS company_parent
ON company_original.parent_id = company_parent.id
-- Join to fortune500, only keep rows that match
INNER JOIN fortune500
-- Use parent ticker if there is one, 
-- otherwise original ticker
ON COALESCE (company_original.ticker,
              company_parent.ticker) = fortune500.ticker
-- For clarity, order by rank
ORDER BY rank;

Effects of casting

In [None]:
-- Select the original value
SELECT profits_change, 
        -- Cast profits_change
       CAST(profits_change AS integer) AS profits_change_int
FROM fortune500;

In [None]:
-- Divide 10 by 3
SELECT 10/3, 
       -- Cast 10 as numeric and divide by 3
       10::numeric/3;

In [None]:
SELECT '3.2'::numeric,
       '-123'::numeric,
       '1e3'::numeric,
       '1e-3'::numeric,
       '02314'::numeric,
       '0002'::numeric;

Summarize the distribution of numeric values

In [None]:
-- Select the count of each value of revenues_change
SELECT 
  revenues_change,
  COUNT(*)  
FROM fortune500
GROUP BY revenues_change
-- Order by the values of revenues_change
ORDER BY revenues_change ASC;

In [None]:
-- Select the count of each revenues_change integer value
SELECT 
  revenues_change::integer, 
  COUNT(*)
FROM fortune500
GROUP BY revenues_change::integer
-- order by the values of revenues_change
ORDER BY revenues_change;

In [None]:
-- Count rows 
SELECT COUNT(*)
FROM fortune500
-- Where...
WHERE revenues_change > 0;