In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
import random
from scipy.stats import linregress

In [None]:
# Retrieve data files
happiness_2019_2020_file_path = "../data/TOTAL_2019_2020_clean.csv"

# Read the data files
happiness_2019_2020 = pd.read_csv(happiness_2019_2020_file_path)

In [None]:
# Create two data frames for each file
happiness_df = pd.DataFrame(happiness_2019_2020)
print(happiness_df)


In [None]:
# Remove extra columns
data_df = happiness_df.drop(["Country", "Region", "Year", "Happiness Rank"], axis= 1)
data_df.head()

In [None]:
column_names= []
for col in data_df.columns:
    column_names.append(col)
column_names.remove("Happiness Score")
print(column_names)

In [None]:
#Scatter plots for happiness score correlation
for type in column_names:
    # Set figure size
    plt.figure(figsize= (18,10))
    # Make scatter plot
    plt.scatter(data_df["Happiness Score"], data_df[type])
    # Set title
    plt.title (f"Relationship between Happiness Score and Score {type}", fontsize=20)
    # Set x-axis label
    plt.xlabel("Happiness Score", fontsize= 16)
    # Set y-axis label
    plt.ylabel(type, fontsize= 16)
    # Add grid to plot
    plt.grid(color= "gray", linestyle= "--", linewidth= 0.5)
    #Set text to be added with plt.figtext
    txt= (f"Relationship between [Happiness Score] and [{type}] in 2019 and 2020.")
    plt.figtext(0.5, 0.01, txt, wrap=True, horizontalalignment= 'center', fontsize= 14)
    # Save figure
    plt.savefig(f"outputs/{type}_scatter.png", dpi= 600, transparent= False)
    # Display plot
    plt.show()


In [None]:
# Regression evaluation
for type in column_names:
    (slope, intercept, rvalue, pvalue, stderr) = linregress(data_df["Happiness Score"], data_df[type])
    regress_values = data_df["Happiness Score"] * slope + intercept
    line_equation = (f"y= {str(round(slope,2))}x + {str(round(intercept,2))}")
    # Set fig, ax to encompass plot
    fig, ax= plt.subplots(figsize= (20,10))
    # Make scatter plot
    ax.scatter(data_df["Happiness Score"], data_df[type])
    # Set x-axis label
    ax.set_xlabel("Happiness Score")
    # Set y-axis label
    ax.set_ylabel(type)
    # Set title of plot
    ax.set_title(f"Relationship between Happiness Score and Score {type}", fontsize=20)
    #  Set x-axis label
    ax.set_xlabel("Happiness Score", fontsize= 20)
    # Set y-axis label
    ax.set_ylabel(type, fontsize= 20)
    # Add a grid to the plot and style
    ax.grid()
    plt.grid(color= "gray", linestyle="--", linewidth= 0.5)
    # Add regression line to the scatter plot
    ax.plot(data_df["Happiness Score"], regress_values, "r-")
    # Display the line equation and r-squared on plot
    ax.text(0.1,0.925, (f"{line_equation} \n r-squared= {round(rvalue**2,2)}    "), horizontalalignment='center',verticalalignment='center', transform = ax.transAxes,  fontsize=14, fontweight='bold')
    # Set texts to be displayed in teh figtext
    txt_1= (f"Correlation between Happiness Score and Score {type} from 2019-2020 surveys.")
    txt_2= ("\n For Social Science: High association: r-squared greater than 0.67 | Moderate predictive association: r-squared between 0.33 and 0.67 \n Low asssociation: r-squared between 0.19 and 0.33 | No association: r-squared below 0.19")
    plt.figtext(0.5, 0.01, txt_1, wrap=True, horizontalalignment='center', fontsize=16)
    plt.figtext(0.5, -0.05, txt_2, wrap=True, fontsize=12, va="bottom", ha="center")
    # Save the figure
    plt.savefig(f"outputs/{type}_regression.png", dpi= 600, transparent= False)
    # Display the figure
    plt.show()


In [None]:
happiness_df

In [None]:
happiness_2019_df= happiness_df[happiness_df['Year'] == 2019]

In [None]:
happiness_2020_df= happiness_df[happiness_df['Year'] == 2020]+


In [None]:
print(data_2019_df.index)
print(data_2020_df.index)

In [None]:
countries_2019 = happiness_2019_df['Country'].tolist()
countries_2019
countries_2020 = happiness_2020_df['Country'].tolist()
countries_2020

drop_countries = []
for country in countries_2020:
    if country not in countries_2020:
        drop_countries.append(country)
drop_countries

In [None]:
delta_data_df = data_2020_df.subtract(data_2019_df)
delta_data_df

In [None]:

data_2019_df = happiness_2019_df.drop(["Country", "Region", "Year", "Happiness Rank"], axis= 1)
data_2019_df.head()

data_2020_df = happiness_2020_df.drop(["Country", "Region", "Year", "Happiness Rank"], axis= 1)
data_2020_df.head()
