# Data analysis with aggregations 

In [None]:
# Calculate the average, minimum and maximum
SELECT AVG(DurationSeconds) AS Average, 
       MIN(DurationSeconds) AS Minimum, 
       MAX(DurationSeconds) AS Maximum
FROM Incidents

In [None]:
# aggreations with groupby need to using HAVING
SELECT State, MAX(DurationSeconds)
FROM Incidents
GROUP BY State

In [None]:
# Calculate the aggregations by Shape
SELECT Shape,
       AVG(DurationSeconds) AS Average, 
       MIN(DurationSeconds) AS Minimum, 
       MAX(DurationSeconds) AS Maximum
FROM Incidents
GROUP BY Shape
# Return records where minimum of DurationSeconds is greater than 1
HAVING MIN(DurationSeconds) > 1

# Dealing with missing data
 

In [None]:
# removing null values
SELECT *  
FROM Incidents
WHERE Shape IS NOT NULL

# Return the specified columns
SELECT IncidentState, IncidentDateTime
FROM Incidents
# Exclude all the missing values from IncidentState  
WHERE IncidentState IS NOT NULL

In [None]:
# if you want to replace null values with a string
# replace any null value in Shape column with Saucer 
SELECT  Shape, ISNULL(Shape, 'Saucer') AS Shape2
FROM Incidents

# Check the IncidentState column for missing values and replace them with the City column
SELECT IncidentState, ISNULL(IncidentState, City) AS Location
FROM Incidents
# Filter to only return missing values from IncidentState
WHERE IncidentState IS NULL

In [None]:
# What if you want to replace missing values in one column with another and
# want to check the replacement column to make sure it doesn't have any missing values
SELECT Shape, City, COALESCE(Shape, City, 'Unknown') as NewShape
FROM Incidents

# Replace missing values 
SELECT Country, COALESCE(Country, IncidentState, City) AS Location
FROM Incidents
WHERE Country IS NULL

# Binning Data with CASE

In [None]:
# select the column country
SELECT Country, 
        #if country is 'us' then name it 'USA'
       CASE WHEN Country = 'us'  THEN 'USA'
        # anything else name international
       ELSE 'International'
        #store it in a new column SourceCountry
       END AS SourceCountry
FROM Incidents

In [None]:
# Complete the syntax for cutting the duration into different cases
SELECT DurationSeconds, 
# Start with the 2 TSQL keywords, and after the condition a TSQL word and a value
       CASE WHEN (DurationSeconds <= 120) THEN 1
# The pattern repeats with the same keyword and after the condition the same word and next value    
	   WHEN (DurationSeconds > 120 AND DurationSeconds <= 600) THEN 2
# Use the same syntax here  
	   WHEN (DurationSeconds > 601 AND DurationSeconds <= 1200) THEN 3
# Use the same syntax here 
	   WHEN (DurationSeconds > 1201 AND DurationSeconds <= 5000) THEN 4
# Specify a value
       ELSE 5 
	   END AS SecondGroup
FROM Incidents

# Counting and totals
 

In [None]:
# Write a query that returns an aggregation 
SELECT DISTINCT(MixDesc), SUM(Quantity) AS Total
FROM Shipments
# Group by the relevant column
GROUP BY MixDesc

In [None]:
# Count the number of rows by MixDesc
SELECT MixDesc, COUNT(*)
FROM Shipments
GROUP BY MixDesc

SELECT COUNT(DISTINCT MixDesc)

# DATE 

In [None]:
# DD for day
# MM for month
# YY for year
# HH for hour
# DATEADD(DATEPART, number, date) add or subtract datetime values
# DATEADD(DD, 30, '2020-06-21')  find the date 30 days after
# DATEDIFF(datepart, startdate, endate) obtain difference between 2 date values
# SELECT DATEDIFF(YYYY, DateOne, DateTwo)

In [None]:
# returns the number of days between OrderDate and ShipDate
SELECT OrderDate, ShipDate, 
       DATEDIFF(DD, OrderDate, ShipDate) AS Duration
FROM Shipments

# Write a query that returns the approximate delivery date as five days after the ShipDate.
Return the DeliveryDate as 5 days after the ShipDate
SELECT OrderDate, 
       DATEADD(DD, 5, ShipDate) AS DeliveryDate
FROM Shipments

# Rounding and Truncating Numbers 

In [None]:
# ROUND(number, length [,function])
SELECT DURATION
ROUND(DURATION, 0) AS roundzero, # turn 121.62 to 122.00
ROUND(DURATION, 1) AS roundone, # turn 121.62 to 121.600
ROUND(DURATION, -1) AS roundtenth # turn 121.62 to 120
ROUND(DURATION, -2) AS roundhundred # 121.62 to 100
ROUND(DURATION, 0, 1) AS Truncating #17.91 to 17.00 
FROM Table

In [None]:
# round the values in the Cost column to the nearest whole number.
# Round Cost to the nearest dollar
SELECT Cost, 
       ROUND(Cost, 0) AS RoundedCost
FROM Shipments

In [None]:
# truncate the values in the Cost column to the nearest whole number.
SELECT Cost, 
       ROUND(cost, 0,1) AS TruncateCost
FROM Shipments

# More Math Functions

In [None]:
# absolute number. Return positive
ABS(number)
# find sqrt, square
SQRT(9)
SQUARE(9)
#log
LOG(number [,Base])

In [None]:
# converts all the negative values in the DeliveryWeight column to positive values.
SELECT DeliveryWeight,
       ABS(DeliveryWeight) AS AbsoluteValue
FROM Shipments

In [None]:
#  calculates the square and square root of the WeightValue column.
SELECT WeightValue,
SQUARE(WeightValue) AS WeightSquare,
SQRT(WeightValue) AS WeightSqrt
FROM Shipment

# WHILE loops
 

In [None]:
# variable
DECLARE @Snack varchar(128)
SELECT @Snack = 'Twiz'
SELECT @Snack

In [None]:
# Declare the variable (a SQL Command, the var name, the datatype)
DECLARE @counter INT 

# Set the counter to 20
SET @counter = 20

# Select and increment the counter by one 
SELECT @counter = @counter +1

# Print the variable
SELECT @counter

In [None]:
# while loop
WHILE some_condition 

BEGIN 
    -- Perform some operation here
END

In [None]:
DECLARE @counter INT 
SET @counter = 20

# Create a loop
WHILE @counter < 30

# Loop code starting point
BEGIN
	SELECT @counter = @counter + 1
# Loop finish
END

# Check the value of the variable
SELECT @counter

# Derived tables
 

In [None]:
SELECT a.RecordId, a.Age, a.BloodGlucoseRandom, 
# Select maximum glucose value (use colname from derived table)
       b.MaxGlucose
FROM Kidney a
# Join to derived table
JOIN (SELECT Age, MAX(BloodGlucoseRandom) AS MaxGlucose FROM Kidney GROUP BY Age) b
# Join on Age
ON a.Age = b.Age

In [None]:
SELECT *
FROM Kidney a
# JOIN and create the derived table
JOIN (SELECT Age, MAX(BloodPressure) AS MaxBloodPressure FROM Kidney GROUP BY Age) b
# JOIN on BloodPressure equal to MaxBloodPressure
ON a.BloodPressure = b.MaxBloodPressure
# Join on Age
AND a.Age = b.Age

# Common Table Expressions
 

In [None]:
# Specify the keyowrds to create the CTE
WITH BloodGlucoseRandom (MaxGlucose) 
AS (SELECT MAX(BloodGlucoseRandom) AS MaxGlucose FROM Kidney)

SELECT a.Age, b.MaxGlucose
FROM Kidney a
# Join the CTE on blood glucose equal to max blood glucose
JOIN BloodGlucoseRandom b
ON a.BloodGlucoseRandom = b.MaxGlucose

In [None]:
# Create the CTE
WITH BloodPressure (MaxBloodPressure) 
AS (SELECT MAX(BloodPressure) AS MaxBloodPressure FROM Kidney)

SELECT *
FROM Kidney a
# Join the CTE  
JOIN BloodPressure b
ON a.BloodPressure = b.MaxBloodPressure

# Window functions in T-SQL
 

In [None]:
OVER(PARITION BY SalesYear ORDER BY SalesYear)

In [None]:
# the table changes based on territoryName
SELECT OrderID, TerritoryName, 
       #Total price using the partition
       SUM(OrderPrice) 
       # Create the window and partitions
       OVER(PARTITION BY TerritoryName) AS TotalPrice
FROM Orders

In [None]:
SELECT OrderID, TerritoryName, 
       # Number of rows per partition
       COUNT(*) 
       # Create the window and partitions
       OVER(PARTITION BY TerritoryName) AS TotalOrders
FROM Orders

# Common window functions
 

In [None]:
SELECT TerritoryName, OrderDate, 
       # Select the first value in each partition
       FIRST_VALUE(OrderDate) 
       # Create the partitions and arrange the rows
       OVER(PARTITION BY TerritoryName ORDER BY OrderDate) AS FirstOrder
FROM Orders

In [None]:
SELECT TerritoryName, OrderDate, 
       # Specify the previous OrderDate in the window
       LAG(OrderDate) 
       # Over the window, partition by territory & order by order date
       OVER(PARTITION BY TerritoryName ORDER BY OrderDate) AS PreviousOrder,
       # Specify the next OrderDate in the window
       LEAD(OrderDate) 
       # Create the partitions and arrange the rows
       OVER(PARTITION BY TerritoryName ORDER BY OrderDate) AS NextOrder
FROM Orders


# Increasing window complexity
 

In [None]:
SELECT TerritoryName, OrderDate, 
       # Create a running total
       SUM(OrderPrice) 
       # Create the partitions and arrange the rows
       OVER(PARTITION BY TerritoryName ORDER BY OrderDate) AS TerritoryTotal	  
FROM Orders

In [None]:
SELECT TerritoryName, OrderDate, 
       # Assign a row number
       ROW_NUMBER() 
       # Create the partitions and arrange the rows
       OVER(PARTITION BY TerritoryName ORDER BY OrderDate) AS OrderCount
FROM Orders

# Using windows for statistical functions


In [None]:
SELECT OrderDate, TerritoryName, 
       # Calculate the standard deviation
	   STDEV(OrderPrice) 
       OVER(PARTITION BY TerritoryName ORDER BY OrderDate) AS StdDevPrice	  
FROM Orders

In [None]:
# Create a CTE Called ModePrice which contains two columns
WITH ModePrice (OrderPrice, UnitPriceFrequency)
AS
(
	SELECT OrderPrice, 
	ROW_NUMBER() 
	OVER(PARTITION BY OrderPrice ORDER BY OrderPrice) AS UnitPriceFrequency
	FROM Orders 
)

# Select everything from the CTE
SELECT * 
FROM ModePrice

In [None]:
# CTE from the previous exercise
WITH ModePrice (OrderPrice, UnitPriceFrequency)
AS
(
	SELECT OrderPrice,
	ROW_NUMBER() 
    OVER (PARTITION BY OrderPrice ORDER BY OrderPrice) AS UnitPriceFrequency
	FROM Orders
)

# Select the order price from the CTE
SELECT OrderPrice AS ModeOrderPrice
FROM ModePrice
# Select the maximum UnitPriceFrequency from the CTE
WHERE UnitPriceFrequency IN (SELECT MAX(UnitPriceFrequency) From ModePrice)