In [None]:
import os
import csv
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import linregress
import requests
import json

In [None]:
#file path
#Import clean csv file
project_df_path = "Data/merge.csv"
project_df = pd.read_csv(project_df_path)
project_df.head()

In [None]:
#see dataframe column as a list
#project_df.columns.tolist()

In [None]:
#suppress warning
import warnings
warnings.simplefilter("ignore")

In [None]:
#Get ranking and totals data
totals_df = project_df[['Year_HS', 'Country_SP', 'Overall rank_HS', 'Score_HS', 'SPI Rank_SP',  'Social Progress Index_SP']]

#Rename year and country columns
totals_df.rename(columns = {"Year_HS":"Year", "Country_SP":"Country"}, inplace = True)

#configure SPI ranking to not have a decimal
totals_df["SPI Rank_SP"] = totals_df["SPI Rank_SP"].astype(int)

In [None]:
#Top Rankings of Happiness Score
#ranking_HS = totals_df.sort_values(["Year","Overall rank_HS"])
#ranking_HS

In [None]:
#Top Rankings of Social Progress Index
#ranking_SP = totals_df.sort_values(["Year","SPI Rank_SP"])
#ranking_SP

In [None]:
ranking_both_top10 = totals_df[(totals_df["Overall rank_HS"] <= 10) & (totals_df["SPI Rank_SP"] <= 10)]
ranking_both_top10

In [None]:
ranking_both_top25 = totals_df[(totals_df["Overall rank_HS"] <= 25) & (totals_df["SPI Rank_SP"] <= 25)]
ranking_both_top25

In [None]:
#Scatterplot all years 
x_values = totals_df["Score_HS"]
y_values = totals_df["Social Progress Index_SP"]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = slope * x_values + intercept
line_eq = "y =" + str(round(slope,2)) + "x " + str(round(intercept,2))

plt.title("Overall Social Progress Index and Happiness Score (2015-2019)")
plt.ylabel(" Social Progress Index")
plt.xlabel("Happiness Score")
plt.ylim(20,100)
plt.xlim(2,10)
plt.grid(True, linestyle="-", which="major", color="gray", alpha=0.25)
plt.annotate(line_eq, (3,30), fontsize=12, color="red")

plt.scatter(x_values, y_values, color="black", marker="o")
plt.plot(x_values, regress_values, "r-")
plt.show()

#print r squared
print(f"R squared: {rvalue**2}")

In [None]:
#Group by country
grouped_totals_df = totals_df.groupby(["Country"])
grouped_totals_df.mean()

In [None]:
#Converting groupby to a dataframe
averages_df = pd.DataFrame(grouped_totals_df["Overall rank_HS", "Score_HS", "SPI Rank_SP", "Social Progress Index_SP"].mean())

In [None]:
#Scatterplot averaged data 
x_values = averages_df["Score_HS"]
y_values = averages_df["Social Progress Index_SP"]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = slope * x_values + intercept
line_eq = "y =" + str(round(slope,2)) + "x " + str(round(intercept,2))

plt.title("Five-year Average Social Progress Index and Happiness Score")
plt.ylabel(" Social Progress Index")
plt.xlabel("Happiness Score")
plt.ylim(20,100)
plt.xlim(2,10)
plt.grid(True, linestyle="-", which="major", color="gray", alpha=0.25)
plt.annotate(line_eq, (3,30), fontsize=12, color="red")

plt.scatter(x_values, y_values, color="black", marker="o")
plt.plot(x_values, regress_values, "r-")
plt.show()

#print r squared
print(f"R squared: {rvalue**2}")