In [1]:
import csv
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [2]:
def loadData(fileName, inputVariabName, outputVariabName):
    data = []
    dataNames = []
    with open(fileName) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        line_count = 0
        for row in csv_reader:
            if line_count == 0:
                dataNames = row
            else:
                data.append(row)
            line_count += 1
    selectedVariable = dataNames.index(inputVariabName)
    inputs = [float(data[i][selectedVariable]) for i in range(len(data)) if data[i][selectedVariable] != '' and data[i][selectedVariable] != 0]
    selectedOutput = dataNames.index(outputVariabName)
    outputs = [float(data[i][selectedOutput]) for i in range(len(data)) if data[i][selectedVariable] != '' and data[i][selectedVariable] != 0]

    return inputs, outputs

def load2Data(fileName, firstVariabName, secondVariableName, outputVariabName):
    data = []
    dataNames = []
    with open(fileName) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        line_count = 0
        for row in csv_reader:
            if line_count == 0:
                dataNames = row
            else:
                data.append(row)
            line_count += 1
    selectedOutput = dataNames.index(outputVariabName)
    selectedVariable1 = dataNames.index(firstVariabName)
    selectedVariable2 = dataNames.index(secondVariableName)

    inputs1 = []
    inputs2 = []
    outputs = []

    for i in range(len(data)):
        i1 = data[i][selectedVariable1]
        i2 = data[i][selectedVariable2]
        out = data[i][selectedOutput]

        if i1 != '' and i1 != 0 and i2 != '' and i2 != 0:
            inputs1.append(float(i1))
            inputs2.append(float(i2))
            outputs.append(float(out))

    return inputs1, inputs2, outputs

In [3]:
inputsF, outsF = loadData('data/v2_world-happiness-report-2017.csv','Family','Happiness.Score')
inputsPib, inputsFreedom, outsPF = load2Data('data/v2_world-happiness-report-2017.csv','Economy..GDP.per.Capita.','Freedom','Happiness.Score')

In [4]:
# Familie
def train_and_evaluate_model(inputsF, outsF, example_input):
    df = pd.DataFrame({'Family': inputsF, 'Happiness': outsF})

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        df[['Family']], df['Happiness'], test_size=0.2, random_state=42
    )

    # Train model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Predict and evaluate
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print("Mean Squared Error:", mse)

    # Predict example
    example_df = pd.DataFrame({'Family': example_input})
    predicted = model.predict(example_df)
    print("Predicted Happiness:", predicted)

    return model

model_family = train_and_evaluate_model(inputsF, outsF, [1.7])

Mean Squared Error: 0.6577421455835356
Predicted Happiness: [6.88166476]


In [6]:
# Pib & Libertate
def train_and_evaluate_model(features, target, example_input, feature_names):
    df = pd.DataFrame({**{name: col for name, col in zip(feature_names, features)}, 'Happiness': target})

    # Split data
    X = df[feature_names]
    y = df['Happiness']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Predict and evaluate
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print("Mean Squared Error:", mse)

    # Predict example
    example_df = pd.DataFrame([example_input], columns=feature_names)
    predicted = model.predict(example_df)
    print("Predicted Happiness:", predicted)

    return model

# Example call
model_pib_freedom = train_and_evaluate_model(
    features=[inputsPib, inputsFreedom],
    target=outsPF,
    example_input=[1.61, 0.61],
    feature_names=['Pib', 'Freedom']
)

Mean Squared Error: 0.3683180155898609
Predicted Happiness: [9162834.91566559]
