In [None]:
# Happiness Project

## Dataset Cleanup and Exploratory Analysis 

- Basic clean up on all datasets using Excel (Find and Replace NULL values, standardize all country names/values, remove duplicates, drop data irrelevant to analysis)
- Export all csv datasets and upload to PostgreSQL for further cleanup, manipulation and exploration

### Happiness Analysis and rank countries  

- General exploratory analysis on top and lowest ranking countries for each measure 
- Find top 20 happiest countries and the top 20 least happiest countries and use this as ranking countries 
- Drop data irrelevant to analysis

In [None]:
CREATE TABLE Happiness (
	countryname VARCHAR(100),
	year INT,
	lifeladder FLOAT,
	gdp FLOAT,
	socialsupport FLOAT,
	life_expectancy_birth FLOAT,
	perception_corruption FLOAT)

DELETE FROM happiness
WHERE year < 2012

-- top/lowest 20 happiest countries avg score between 2012-2022

SELECT DISTINCT countryname,
    ROUND(AVG(lifeladder)::numeric, 2) AS happiness_avg
FROM happiness
GROUP BY countryname
ORDER BY happiness_avg DESC
LIMIT 20;

--looking at top/lowest social support scores 

SELECT DISTINCT countryname,
    ROUND(AVG(socialsupport)::numeric, 2) AS support_avg
FROM happiness
GROUP BY countryname
ORDER BY support_avg DESC
LIMIT 10;

-- create table looking at only rank countries (top20/lowest20 countries) adding countryid

CREATE TABLE Country(
	countryid SERIAL PRIMARY KEY NOT NULL, 
	countryname VARCHAR(100) NOT NULL,
	avg_happiness_score FLOAT)

INSERT INTO happiness_rank(countryname, avg_happiness_score)
SELECT countryname,
       ROUND(AVG(lifeladder)::numeric, 2) AS happiness_avg
FROM happiness
GROUP BY countryname
ORDER BY happiness_avg DESC
LIMIT 20;

--happiness score for rank countries looking at every year 2012-2022 (for correlation analysis)

CREATE TABLE happiness_rank_years AS
(SELECT r.countryid, h.countryname, h.year, h.lifeladder
FROM happiness h 
RIGHT JOIN happiness_rank r 
ON r.countryname = h.countryname
ORDER BY r.countryid);