# Project - Predict the wellbeing of shanghainese communities

# MACHINE LEARNING

In [11]:
import pandas as pd
import numpy as np

In [16]:
df_predict = pd.read_pickle("df_prediction.pkl")

In [17]:
df_predict.head()

Unnamed: 0,nb_transportation_scaled,nb_shopping_scaled,nb_restaurant_scaled,nb_scenicSpot_scaled,nb_stadiumAndGym_scaled,nb_mobike_scaled,green_space_scaled,happiness_equalCoff,happiness_clean,happiness_smell,happiness_noise,happiness_perso,happiness_other
0,-0.757151,-0.598362,-0.721806,-0.405934,-0.747588,-0.808407,0.265348,1.666667,2.428571,1.285714,1.285714,1.6,1.444444
1,-0.838055,-0.682104,-0.801698,-0.404075,-0.792245,-0.808407,0.265348,1.333333,2.095238,0.952381,0.952381,1.266667,1.111111
2,-0.905242,-0.69263,-0.810939,-0.427699,-0.807475,-0.808395,0.265348,1.666667,2.428571,1.285714,1.285714,1.6,1.444444
3,-0.911484,-0.694507,-0.783498,-0.381704,-0.775861,-0.808336,0.265348,1.333333,1.714286,1.142857,1.142857,1.3,1.222222
4,-0.905943,-0.6884,-0.805164,-0.421503,-0.800932,-0.80837,0.265348,1.0,1.571429,1.0,0.428571,1.1,0.666667


# 1. Classification

## a. Happiness when clean, smell, noise have the same coefficient

In [21]:
X = df_predict[['nb_transportation_scaled', 'nb_shopping_scaled', 'nb_restaurant_scaled', 'nb_scenicSpot_scaled', 'nb_stadiumAndGym_scaled', 'nb_mobike_scaled', 'green_space_scaled']] #features
Y_coeff = df_predict['happiness_equalCoff'] #target

In [23]:
Z_coeff = pd.DataFrame.copy(Y_coeff)
Z_coeff[Y_coeff <= np.quantile(Y_coeff,0.25)] = "Very unhappy" # could also use class 1,2,3,4
Z_coeff[(Y_coeff > np.quantile(Y_coeff, 0.25)) & (Y_coeff <= np.quantile(Y_coeff, 0.5))] = "Unhappy"
Z_coeff[(Y_coeff > np.quantile(Y_coeff, 0.5)) & (Y_coeff <= np.quantile(Y_coeff, 0.75))] = "Happy"
Z_coeff[Y_coeff> np.quantile(Y_coeff,0.75)] = "Very happy"

In [26]:
df_predict.insert(8, "class_samecoeff", Z_coeff)

In [27]:
df_predict.groupby('class_samecoeff').size()

class_samecoeff
Happy           33
Unhappy          6
Very happy      31
Very unhappy    59
dtype: int64

## b. Happiness when CLEAN has more importance than noise, smell

In [28]:
Y_clean = df_predict['happiness_clean'] #target

In [34]:
Z_clean = pd.DataFrame.copy(Y_clean)
Z_clean[Y_clean <= np.quantile(Y_clean,0.25)] = "Very unhappy" # could also use class 1,2,3,4
Z_clean[(Y_clean > np.quantile(Y_clean, 0.25)) & (Y_clean <= np.quantile(Y_clean, 0.5))] = "Unhappy"
Z_clean[(Y_clean > np.quantile(Y_clean, 0.5)) & (Y_clean <= np.quantile(Y_clean, 0.75))] = "Happy"
Z_clean[Y_clean> np.quantile(Y_clean,0.75)] = "Very happy"

In [36]:
df_predict.insert(10, "class_clean", Z_clean)

In [40]:
df_predict.groupby('class_clean').size()

class_clean
Happy           31
Unhappy         35
Very happy      30
Very unhappy    33
dtype: int64

## c. Happiness when SMELL has more importance than clean, noise

In [42]:
Y_smell = df_predict['happiness_smell'] #target

In [43]:
Z_smell = pd.DataFrame.copy(Y_smell)
Z_smell[Y_clean <= np.quantile(Y_smell,0.25)] = "Very unhappy" # could also use class 1,2,3,4
Z_smell[(Y_clean > np.quantile(Y_smell, 0.25)) & (Y_smell <= np.quantile(Y_smell, 0.5))] = "Unhappy"
Z_smell[(Y_clean > np.quantile(Y_smell, 0.5)) & (Y_smell <= np.quantile(Y_smell, 0.75))] = "Happy"
Z_smell[Y_clean> np.quantile(Y_smell,0.75)] = "Very happy"

In [46]:
df_predict.insert(12, "class_smell", Z_smell)

In [47]:
df_predict.groupby('class_smell').size()

class_smell
Very happy    129
dtype: int64

Just one class so it is better to have only 2 classes for this target (same thing for the next one noise):

In [69]:
Z_smell = pd.DataFrame.copy(Y_smell)
Z_smell[Y_smell <= np.median(Y_smell)] = "Unhappy"
Z_smell[Y_smell > np.median(Y_smell)] = "Happy"

In [71]:
df_predict.insert(12, "class_smell", Z_smell)

In [72]:
df_predict.groupby('class_smell').size()

class_smell
Happy      44
Unhappy    85
dtype: int64

## d. Happiness when NOISE has more importance than clean, smell

In [None]:
Y_noise = df_predict['happiness_noise'] #target

In [73]:
Z_noise = pd.DataFrame.copy(Y_noise)
Z_noise[Y_noise <= np.median(Y_noise)] = "Unhappy"
Z_noise[Y_noise > np.median(Y_noise)] = "Happy"

In [75]:
df_predict.insert(14, "class_noise", Z_noise)

In [77]:
df_predict.groupby('class_noise').size()

class_noise
Happy      64
Unhappy    65
dtype: int64

## e. Happiness - personnal opinion

In [53]:
Y_perso = df_predict['happiness_perso'] #target

In [57]:
Z_perso = pd.DataFrame.copy(Y_perso)
Z_perso[Y_perso <= np.quantile(Y_perso,0.25)] = "Very unhappy" # could also use class 1,2,3,4
Z_perso[(Y_perso > np.quantile(Y_perso, 0.25)) & (Y_perso <= np.quantile(Y_perso, 0.5))] = "Unhappy"
Z_perso[(Y_perso > np.quantile(Y_perso, 0.5)) & (Y_perso <= np.quantile(Y_perso, 0.75))] = "Happy"
Z_perso[Y_perso> np.quantile(Y_perso,0.75)] = "Very happy"

In [60]:
df_predict.insert(16, "class_perso", Z_perso)

In [63]:
df_predict.groupby('class_perso').size()

class_perso
Happy           31
Unhappy         31
Very happy      32
Very unhappy    35
dtype: int64

## f. Happiness - arbitary choice

In [65]:
Y_other = df_predict['happiness_other'] #target

In [66]:
Z_other = pd.DataFrame.copy(Y_other)
Z_other[Y_perso <= np.quantile(Y_other,0.25)] = "Very unhappy" # could also use class 1,2,3,4
Z_other[(Y_perso > np.quantile(Y_other, 0.25)) & (Y_other <= np.quantile(Y_other, 0.5))] = "Unhappy"
Z_other[(Y_perso > np.quantile(Y_other, 0.5)) & (Y_other <= np.quantile(Y_other, 0.75))] = "Happy"
Z_other[Y_perso> np.quantile(Y_other,0.75)] = "Very happy"

In [67]:
df_predict.insert(18, "class_other", Z_other)

In [68]:
df_predict.groupby('class_other').size()

class_other
1.2592592592592593     1
Happy                 32
Unhappy               20
Very happy            69
Very unhappy           7
dtype: int64

In [None]:
## Split data into training and test sets