In [None]:
  --Covid Death data set (this is to make sure that the dataset was properly uploaded)
SELECT
  *
FROM
  `lino-portfolio.Covid_Project.Covid_Deaths`
ORDER BY
  3,
  4;

  --Covid Vaccination data set (this is to make sure that the dataset was properly uploaded)
SELECT
  *
FROM
  `lino-portfolio.Covid_Project.Covid_Vaccination`
ORDER BY
  3,
  4;

  --These are the columns that we focused on in the "covid deaths" dataset
SELECT
  location,
  date,
  total_cases,
  new_cases,
  total_deaths,
  population
FROM
  `lino-portfolio.Covid_Project.Covid_Deaths`
ORDER BY
  1,
  2;

  --Covid-19 Death rate in the United States(The ratio of deaths to the infected population)
SELECT
  location,
  date,
  total_cases,
  total_deaths,
  (total_deaths/total_cases)*100 AS Death_Rate
FROM
  `lino-portfolio.Covid_Project.Covid_Deaths`
WHERE
  location = 'United States'
ORDER BY
  1,
  2;

  --Percentage of the population that caught covid-19 in the United States
SELECT
  location,
  date,
  population,
  total_cases,
  (total_cases/population)*100 AS Infected_Population_Percentage
FROM
  `lino-portfolio.Covid_Project.Covid_Deaths`
WHERE
  location = 'United States'
ORDER BY
  1,
  2;

  --Infection rate per country(is the frequency/rate of occurrence of new Covid_19 infection within the population)
SELECT
  location,
  population,
  MAX(total_cases) AS Hightest_Infection_Count,
  MAX(total_cases/population)*100 AS Infected_Population_Percentage
FROM
  `lino-portfolio.Covid_Project.Covid_Deaths`
GROUP BY
  location,
  population
ORDER BY
  4 DESC;

  --Death rate per country
SELECT
  location,
  population,
  MAX(total_deaths) AS Hightest_Death_Count,
  MAX(total_deaths/population)*100 AS Death_Rate
FROM
  `lino-portfolio.Covid_Project.Covid_Deaths`
GROUP BY
  location,
  population
ORDER BY
  4 DESC;

  --Highest infection count per country
SELECT
  location,
  population,
  MAX(total_cases) AS Hightest_Infection_Count
FROM
  `lino-portfolio.Covid_Project.Covid_Deaths`
WHERE
  location != 'World'
  AND location != 'High income'
  AND location != 'Europe'
  AND location != 'Asia'
  AND location != 'European Union'
  AND location !='Upper middle income'
  AND location !='North America'
  AND location !='Lower middle income'
  AND location !='South America'
GROUP BY
  location,
  population
ORDER BY
  3 DESC;

  --Top 6 countries with the highest infection count
SELECT
  DISTINCT location,
  SUM(new_cases) AS total_new_cases,
  SUM(new_deaths) AS total_new_deaths,
  (SUM(new_deaths)/SUM(new_cases)*100) AS top_six_heighest_Death_Rate
FROM
  `lino-portfolio.Covid_Project.Covid_Deaths`
WHERE
  location IN ('United States',
    'India',
    'Brazil',
    'Germany',
    'France',
    'United Kingdom')
GROUP BY
  location
ORDER BY
  4;

  --Infection rate per continent
SELECT
  continent,
  MAX(total_cases) AS Hightest_Infection_Count,
  MAX(total_cases/population)*100 AS Infected_Population_Percentage
FROM
  `lino-portfolio.Covid_Project.Covid_Deaths`
WHERE
  continent IS NOT NULL
GROUP BY
  continent
ORDER BY
  3 DESC;

  --Death rate per continent
SELECT
  continent,
  MAX(total_deaths) AS Hightest_Death_Count,
  MAX(total_deaths/population)*100 AS Death_Rate
FROM
  `lino-portfolio.Covid_Project.Covid_Deaths`
WHERE
  continent IS NOT NULL
GROUP BY
  continent
ORDER BY
  3 DESC;

  --Death rate per 100,000 for the top 6 countries with the highest infection count
SELECT
  DISTINCT location,
  SUM(new_cases) AS total_new_cases,
  SUM(new_deaths) AS total_new_deaths,
  (SUM(new_deaths)/SUM(new_cases)*100) AS Global_Death_Rate
FROM
  `lino-portfolio.Covid_Project.Covid_Deaths`
WHERE
  location IN ('United States',
    'India',
    'Brazil',
    'Germany',
    'France',
    'United Kingdom')
GROUP BY
  location
ORDER BY
  4;

  --Infection rate per 100,000 in the US
SELECT
  date,
  location,
  SUM(total_cases) AS cases,
  SUM(Total_cases/population)*100000 AS Infected_Population_Per_100000
FROM
  `lino-portfolio.Covid_Project.Covid_Deaths`
WHERE
  date BETWEEN "2020-01-01"
  AND "2020-12-31"
  AND location IN ('United States')
GROUP BY
  date,
  location,
  new_cases
ORDER BY
  1 ASC;

  --Death rate per 100,000 in the US
SELECT
  date,
  location,
  SUM(total_deaths) AS Total_Death_Count,
  SUM(total_deaths/population)*100000 AS Deaths_Per_100000
FROM
  `lino-portfolio.Covid_Project.Covid_Deaths`
WHERE
  date BETWEEN "2020-01-01"
  AND "2020-12-31"
  AND location IN ('United States')
GROUP BY
  date,
  location
ORDER BY
  1 ASC;

  --Numbers of vaccination administrated on a daily basis (by country)
SELECT
  dea. date,
  dea.continent,
  dea.location,
  dea.population,
  vac.new_vaccinations,
  SUM(vac.new_vaccinations) OVER (PARTITION BY dea.location ORDER BY dea.location, dea.date) AS vaccination_tally
FROM
  `lino-portfolio.Covid_Project.Covid_Deaths` AS dea
JOIN
  `lino-portfolio.Covid_Project.Covid_Vaccination` AS vac
ON
  dea.location = vac.location
  AND dea.date = vac.date
WHERE
  dea.continent IS NOT NULL
ORDER BY
  3,
  4;
  
  --Vaccination rate for the countries with the highest infection count
SELECT
  location,
  population,
  SUM(new_vaccinations) AS total_vaccination_administered,
  MAX(people_fully_vaccinated) AS people_fully_vaccinated,
  MAX(people_vaccinated) AS people_vaccinated,
  MAX(people_vaccinated/population)*100 AS Total_vaccination_percentage
FROM
  `lino-portfolio.Covid_Project.Covid_Vaccination`
WHERE
  location IN ('United States',
    'India',
    'Brazil',
    'Germany',
    'France',
    'United Kingdom')
GROUP BY
  location,
  population
ORDER BY
  6  