## DEPENDENCIES

In [1]:
import pandas as pd
from pathlib import Path
from sqlalchemy import create_engine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

## IMPORT DATA

In [2]:
# Create SQLite connection
happiness_path = Path('Resources/HappinessIndexScore.sqlite')
engine = create_engine(f'sqlite:///{happiness_path}')
conn = engine.connect()
conn.close()

In [3]:
# Create df
happiness_df = pd.read_sql('SELECT * FROM final_output', con=engine)

happiness_df.head()

Unnamed: 0,country,region,ladder_score,logged_GPD_per_capita,social_support,healthy_life_expectancy,freedom_life_choices,generosity,perceptions_corruption,population_density,unemployment_rate,median_age,gini_coefficient,avg_temperature,lt_alcohol_per_capita
0,Finland,Europe,7.804,10.792,0.969,71.15,0.961,-0.019,0.182,16.6,7.16,43.2,27.7,3.24,8.23
1,Denmark,Europe,7.586,10.962,0.954,71.25,0.934,0.134,0.196,138.0,5.14,42.2,27.7,9.77,9.16
2,Iceland,Europe,7.53,10.896,0.983,72.05,0.936,0.211,0.668,3.5,3.56,37.8,26.1,2.11,7.72
3,Israel,Middle East,7.473,10.639,0.943,72.697,0.809,-0.023,0.708,412.24,3.39,30.1,38.6,20.23,3.07
4,Netherlands,Europe,7.403,10.942,0.93,71.55,0.887,0.213,0.379,420.38,3.56,42.2,29.2,11.72,8.23


## LOGISTIC REGRESSION MODEL

In [4]:
# Find the mid point of the happiness score
mid_point = (happiness_df['ladder_score'].min()+happiness_df['ladder_score'].max()) * 0.5
mid_point

4.8315

In [5]:
# Create a happiness column with values of 0 or 1 based on the ladder score
happiness_df['happiness'] = 0
happiness_df.loc[happiness_df['ladder_score'] >= mid_point, 'happiness'] = 1

In [6]:
# Drop unnecesary columns
happiness_df.drop(['country', 'region', 'ladder_score'], inplace= True, axis= 1)

In [7]:
# Separate the y variable, the target
y = happiness_df['happiness']

# Separate the X variable, the features
X = happiness_df.drop(columns = ['happiness'])

In [8]:
# review y

y.head()

0    1
1    1
2    1
3    1
4    1
Name: happiness, dtype: int64

In [9]:
# review X

X.head()

Unnamed: 0,logged_GPD_per_capita,social_support,healthy_life_expectancy,freedom_life_choices,generosity,perceptions_corruption,population_density,unemployment_rate,median_age,gini_coefficient,avg_temperature,lt_alcohol_per_capita
0,10.792,0.969,71.15,0.961,-0.019,0.182,16.6,7.16,43.2,27.7,3.24,8.23
1,10.962,0.954,71.25,0.934,0.134,0.196,138.0,5.14,42.2,27.7,9.77,9.16
2,10.896,0.983,72.05,0.936,0.211,0.668,3.5,3.56,37.8,26.1,2.11,7.72
3,10.639,0.943,72.697,0.809,-0.023,0.708,412.24,3.39,30.1,38.6,20.23,3.07
4,10.942,0.93,71.55,0.887,0.213,0.379,420.38,3.56,42.2,29.2,11.72,8.23


In [10]:
# Split the data using train_test_split
# Assign a random_state of 1 to the function
X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=1)

In [11]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [12]:
# Instantiate the Logistic Regression model
# Assign a random_state parameter of 1 to the model
classifier = LogisticRegression(solver='lbfgs', random_state=1)
classifier

# Fit the model using training data
classifier.fit(X_train_scaled, y_train)

In [13]:
print(f"Training Data Score: {classifier.score(X_train_scaled, y_train)}")

Training Data Score: 0.8913043478260869


In [14]:
# Make a prediction using the testing data
predictions = classifier.predict(X_test_scaled)
pd.DataFrame({"Prediction": predictions, "Actual": y_test}).head()

Unnamed: 0,Prediction,Actual
48,1,1
113,0,0
73,1,1
105,0,0
45,1,1


In [15]:
# Get the accuracy score
accuracy_score(predictions, y_test)

0.8709677419354839

In [16]:
# Generate a confusion matrix for the model
confusion_matrix(y_test, predictions)

array([[ 9,  1],
       [ 3, 18]])

In [17]:
# Print the classification report for the model
target_names = ['Unhappy', 'Happy']
print(classification_report(y_test, predictions, target_names=target_names))

              precision    recall  f1-score   support

     Unhappy       0.75      0.90      0.82        10
       Happy       0.95      0.86      0.90        21

    accuracy                           0.87        31
   macro avg       0.85      0.88      0.86        31
weighted avg       0.88      0.87      0.87        31



In [18]:
# Classification feature importance
importances = zip(classifier.coef_[0], X.columns)
importances = sorted(importances, key=lambda x: abs(x[0]), reverse=True)
importances

[(1.3250554021066367, 'social_support'),
 (0.6536921962235471, 'median_age'),
 (-0.6163119985675591, 'avg_temperature'),
 (-0.4601930312790386, 'unemployment_rate'),
 (0.28613899427013445, 'logged_GPD_per_capita'),
 (-0.18211546397024958, 'generosity'),
 (-0.1269678916919012, 'population_density'),
 (-0.12633405966510808, 'perceptions_corruption'),
 (0.09488463024883967, 'freedom_life_choices'),
 (0.04458411859407358, 'healthy_life_expectancy'),
 (0.031556682549600694, 'gini_coefficient'),
 (-0.028831415368151148, 'lt_alcohol_per_capita')]

In [19]:
import dash
from dash import html
from dash import dcc
from dash.dependencies import Input, Output, State
import math

# Create the Dash app
app = dash.Dash(__name__)

# Define the layout of the app
app.layout = html.Div(
    [
        html.H1(
            'Predict happiness using our Logistic Regression model',
            style={
                'font-size': '35px',
                'font-family': 'Arial, sans-serif',
                'font-weight': 'bold'
                }
            ),
        html.Br(), html.Br(), html.Br(),
        html.Div(
            [    
                html.Div(
                    [
                        html.H2(
                            ['1. Enter a value for GPD per capita', html.Br(), '(Dollars per year)'],
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        dcc.Input(
                            id='input1',
                            type='number',
                            value=251.389,
                            step=0.001,
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        html.H2(
                            '(range in data USD 251.389 - USD 115,844.030)',
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '15px'
                                }
                            ),
                        html.Br(),
                        html.H2(
                            ['2. Enter a value for social support', html.Br(), '(Range from 0 to 1)'],
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        dcc.Input(
                            id='input2',
                            type='number',
                            value=0.341,
                            step=0.001,
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        html.H2(
                            '(range in data 0.341 - 0.983)',
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '15px'
                                }
                            ),
                        html.Br(),
                        html.H3(
                            ['3. Enter a value for healthy life expectancy', html.Br(), '(Age)'],
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        dcc.Input(
                            id='input3',
                            type='number',
                            value=51.53,
                            step=0.001,
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        html.H2(
                            '(range in data 51.53 - 74.349)',
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '15px'
                                }
                            ),
                        html.Br(), 
                        html.H3(
                            ['4. Enter a value for freedom of life choices', html.Br(), '(Range from 0 to 1)'],
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        dcc.Input(
                            id='input4',
                            type='number',
                            value=0.382,
                            step=0.001,
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        html.H2(
                            '(range in data 0.382 - 0.961)',
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '15px'
                                }
                            ),
                        html.Br(),
                        html.H3(
                            ['5. Enter a value for generosity', html.Br(), '(Range from -1 to 1)'],
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        dcc.Input(
                            id='input5',
                            type='number',
                            value=-0.254,
                            step=0.001,
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        html.H2(
                            '(range in data -0.254 - 0.531)',
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '15px'
                                }
                            ),
                        html.Br(), 
                        html.H3(
                            ['6. Enter a value for perception of corruption', html.Br(), '(Range from 0 to 1)'],
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        dcc.Input(
                            id='input6',
                            type='number',
                            value=0.146,
                            step=0.001,
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        html.H2(
                            '(range in data 0.146 - 0.929)',
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '15px'
                                }
                            ),
                        ],
                        style={
                            'width': '30%',
                            'display': 'inline-block',
                            'vertical-align': 'top',
                            'padding': '20px'
                            }
                        ),
                html.Div(
                    [
                        html.H3(
                            ['7. Enter a value for population density', html.Br(), '(Population per Sq. Km.)'],
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        dcc.Input(
                            id='input7',
                            type='number',
                            value=2.08,
                            step=0.001,
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        html.H2(
                            '(range in data 2.08 - 8,310.69)',
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '15px'
                                }
                            ),
                        html.Br(),
                        html.H3(
                            ['8. Enter a value for unemployment rate', html.Br(), '(Percentage)'],
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        dcc.Input(
                            id='input8',
                            type='number',
                            value=0.24,
                            step=0.001,
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        html.H2(
                            '(range in data 0.24 - 27.99)',
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '15px'
                                }
                            ),
                        html.Br(),
                        html.H3(
                            ['9. Enter a value for	median age', html.Br(), '(Age)'],
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        dcc.Input(
                            id='input9',
                            type='number',
                            value=15.1,
                            step=0.001,
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        html.H2(
                            '(range in data 15.1 - 49.5)',
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '15px'
                                }
                            ),
                        html.Br(), 
                        html.H3(
                            ['10. Enter a value for gini coefficient', html.Br(), '(Range from 0 to 1)'],
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        dcc.Input(
                            id='input10',
                            type='number',
                            value=0.232,
                            step=0.001,
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        html.H2(
                            '(range in data 0.232 - 0.63)',
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '15px'
                                }
                            ),
                        html.Br(), 
                        html.H3(
                            ['11. Enter a value for average temperature', html.Br(), '(Degree Celsius)'],
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        dcc.Input(
                            id='input11',
                            type='number',
                            value=-4.22,
                            step=0.001,
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        html.H2(
                            '(range in data -4.22 - 29.28)',
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '15px'
                                }
                            ),
                        html.Br(), 
                        html.H3(
                            ['12. Enter a value for alcohol per capita', html.Br(), '(Liters)'],
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        dcc.Input(
                            id='input12',
                            type='number',
                            value=0,
                            step=0.001,
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px'
                                }
                            ),
                        html.H2(
                            '(range in data 0 - 12.9)',
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '15px'
                                }
                            ),
                        html.Br(), 
                    ],
                    style={
                        'width': '30%',
                        'display': 'inline-block',
                        'vertical-align': 'top',
                        'padding': '20px'
                        }
                    ),
                html.Div(
                    [
                        html.Button(
                            'Predict Happiness',
                            id='submit-button',
                            n_clicks=0,
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '18px',
                                'padding': '10px 20px',
                                'background-color': '#4CAF50',
                                'color': 'white',
                                'border': 'none',
                                'border-radius': '4px',
                                'cursor': 'pointer'
                                }
                            ),
                        html.Br(), html.Br(),
                        html.Div(
                            id='output',
                            style={
                                'font-family': 'Arial, sans-serif',
                                'font-size': '30px'
                                }
                            )
                    ],
                    style={
                        'width': '30%',
                        'display': 'inline-block',
                        'vertical-align': 'top',
                        'padding': '20px'
                        }
                    )
                ]
            )
        ]
    )

@app.callback(
    Output('output', 'children'),
    Input('submit-button', 'n_clicks'),
    State('input1', 'value'),
    State('input2', 'value'),
    State('input3', 'value'),
    State('input4', 'value'),
    State('input5', 'value'),
    State('input6', 'value'),        
    State('input7', 'value'),        
    State('input8', 'value'),        
    State('input9', 'value'), 
    State('input10', 'value'),
    State('input11', 'value'),
    State('input12', 'value'),
        )

def update_output(n_clicks, input1, input2, input3, input4, input5, input6, input7, input8, input9, input10, input11, input12):
    
    if n_clicks is None:
        n_clicks= 0
    
    if n_clicks > 0:
    # Check if any input is None
        if any(x is None for x in [input1, input2, input3, input4, input5, input6, input7, input8, input9, input10, input11, input12]):
            return 'Please provide values for all inputs.'
        
        # Prepare gdp and gini coefficient for prediction
        gdp = math.log(input1)
        gini_coefficient = input10 * 100

        # Prepare the input data as a list
        data = [[gdp, input2, input3, input4, input5, input6, input7, input8, input9, gini_coefficient, input11, input12]]

        # Fit the StandardScaler
        data_scaler = scaler.transform(data)
        
        result = classifier.predict(data_scaler)

        if result == 0:
            happiness = 'UNHAPPY'

        elif result == 1:
            happiness = 'HAPPY'
        else:
            happiness = 'error'

        return f'The country is: {happiness}'

    else:
        return 'Please provide values for all inputs.'


if __name__ == '__main__':
    app.run_server(debug=True)
