# Local Satisfaction data preparation  
This notebook shows the process of getting data from **wide format** (for each observation, as many columns as questions) to **long format** (a column for the question, and a column for the answer, for all the data)

In [1]:
import pandas as pd 
import numpy as np

## 1) Load data  
Load the original data from the google sheet

In [2]:
sheet_id = '1iXFCOE7iAhpajY9v2GjtZM21GDDTjWFRzeO8Q_InP7E'
sheet_name = 'simplified_data'
url = f'https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}'

original_data = pd.read_csv(url)
original_data['id'] = original_data.index
original_data.columns = original_data.columns.str.strip()
original_data.head()

Unnamed: 0,Satisfaction with life,Sense of belonging in community,Satisfaction with tourism,Satisfaction with tourism .1,Tourism,Jobs with tourism,Entrepreneurship with tourism,Local culture from tourism,Production of local productions with tourism,Views on policies on tourism,Satisfaction with the quality of basic educational services,Satisfaction with the quality of basic healh services,Satisfaction with access to recreation,Satisfaction with access to cultural activities,Satisfaction with safety,Sense of a healthy environment,Demographic,id
0,80.0,75.0,70.0,Increase,,,100.0,100.0,100.0,100.0,100.0,,75.0,100.0,50.0,75.0,Central Okanagan,0
1,60.0,0.0,,,,,,,,,,,75.0,50.0,100.0,75.0,,1
2,80.0,100.0,,Increase,,,100.0,100.0,100.0,100.0,100.0,,100.0,0.0,100.0,75.0,Central Okanagan,2
3,70.0,50.0,,,,,,,,,,,50.0,75.0,75.0,50.0,,3
4,70.0,100.0,90.0,Increase,Nearby BC communities | All of BC | Ot...,British Columbia Visitors | Canadian V...,75.0,75.0,75.0,75.0,75.0,Yes,75.0,75.0,50.0,50.0,Central Okanagan,4


In [3]:
original_data.columns

Index(['Satisfaction with life', 'Sense of belonging in community',
       'Satisfaction with tourism', 'Satisfaction with tourism .1', 'Tourism',
       'Jobs with tourism', 'Entrepreneurship with tourism',
       'Local culture from tourism',
       'Production of local productions with tourism',
       'Views on policies on tourism',
       'Satisfaction with the quality of basic educational services',
       'Satisfaction with the quality of basic healh services',
       'Satisfaction with access to recreation',
       'Satisfaction with access to cultural activities',
       'Satisfaction with safety', 'Sense of a healthy environment',
       'Demographic', 'id'],
      dtype='object')

In [4]:
demographic_dict = {'Centr':'Central Okanagan',
                    'Central ':'Central Okanagan',
                    'North Okanaga':'North Okanagan',
                    'North Thompson':'North Okanagan',
                    'South Thompson':'South Okanagan'}

In [5]:
original_data = original_data.replace({'Demographic': demographic_dict})
original_data.groupby('Demographic')['Demographic'].count()

Demographic
Boundary Country                             8
Central Okanagan                           767
Gold Country                                 2
I do not live in the Thompson Okanagan.      2
North Okanagan                             127
Shuswap                                     21
Similkameen Valley                           5
South Okanagan                             115
Unknown                                      2
V0E 2W1                                      1
V4t1e8                                       1
Name: Demographic, dtype: int64

In [6]:
original_data.loc[~original_data['Demographic'].isin(['Boundary Country','Central Okanagan', 'Gold Country','North Okanagan', 'Shuswap',
                            'Similkameen Valley', 'South Okanagan']), 'Demographic'] = np.nan

In [7]:
original_data.head()

Unnamed: 0,Satisfaction with life,Sense of belonging in community,Satisfaction with tourism,Satisfaction with tourism .1,Tourism,Jobs with tourism,Entrepreneurship with tourism,Local culture from tourism,Production of local productions with tourism,Views on policies on tourism,Satisfaction with the quality of basic educational services,Satisfaction with the quality of basic healh services,Satisfaction with access to recreation,Satisfaction with access to cultural activities,Satisfaction with safety,Sense of a healthy environment,Demographic,id
0,80.0,75.0,70.0,Increase,,,100.0,100.0,100.0,100.0,100.0,,75.0,100.0,50.0,75.0,Central Okanagan,0
1,60.0,0.0,,,,,,,,,,,75.0,50.0,100.0,75.0,,1
2,80.0,100.0,,Increase,,,100.0,100.0,100.0,100.0,100.0,,100.0,0.0,100.0,75.0,Central Okanagan,2
3,70.0,50.0,,,,,,,,,,,50.0,75.0,75.0,50.0,,3
4,70.0,100.0,90.0,Increase,Nearby BC communities | All of BC | Ot...,British Columbia Visitors | Canadian V...,75.0,75.0,75.0,75.0,75.0,Yes,75.0,75.0,50.0,50.0,Central Okanagan,4


In [8]:
original_data.Demographic.value_counts()

Central Okanagan      767
North Okanagan        127
South Okanagan        115
Shuswap                21
Boundary Country        8
Similkameen Valley      5
Gold Country            2
Name: Demographic, dtype: int64

In [9]:
demographic_df = original_data[['id','Demographic']].copy()

## Indicator 1  
Sub-regional averages of `Satisfaction with life`

In [10]:
satisfaction_avg = pd.DataFrame(original_data.groupby('Demographic')['Satisfaction with life'].mean())
satisfaction_avg.reset_index(inplace=True)
satisfaction_avg.rename(columns={'Satisfaction with life':'value'}, inplace=True)
satisfaction_avg

Unnamed: 0,Demographic,value
0,Boundary Country,43.75
1,Central Okanagan,60.573664
2,Gold Country,85.0
3,North Okanagan,62.460317
4,Shuswap,57.619048
5,Similkameen Valley,74.0
6,South Okanagan,65.130435


In [11]:
sat_avg_total = pd.DataFrame({'Demographic':['All Thompson Okanagan'], 'value':[original_data['Satisfaction with life'].mean()]})
sat_avg_total

Unnamed: 0,Demographic,value
0,All Thompson Okanagan,59.992674


In [12]:
satisfaction_avg = pd.concat([satisfaction_avg, sat_avg_total]).reset_index(drop=True)
satisfaction_avg

Unnamed: 0,Demographic,value
0,Boundary Country,43.75
1,Central Okanagan,60.573664
2,Gold Country,85.0
3,North Okanagan,62.460317
4,Shuswap,57.619048
5,Similkameen Valley,74.0
6,South Okanagan,65.130435
7,All Thompson Okanagan,59.992674


In [91]:
indicator_1 = satisfaction_avg.rename(columns={'Demographic':'category_1'})
indicator_1['indicator'] = 'satisfaction_with_life'
indicator_1['region'] = 'Thompson Okanagan'
indicator_1['date'] = 2022
indicator_1['category_2'] = ''
indicator_1 = indicator_1[['indicator','region','date','category_1','category_2','value']]
indicator_1


Unnamed: 0,indicator,region,date,category_1,category_2,value
0,satisfaction_with_life,Thompson Okanagan,2022,Boundary Country,,43.75
1,satisfaction_with_life,Thompson Okanagan,2022,Central Okanagan,,60.573664
2,satisfaction_with_life,Thompson Okanagan,2022,Gold Country,,85.0
3,satisfaction_with_life,Thompson Okanagan,2022,North Okanagan,,62.460317
4,satisfaction_with_life,Thompson Okanagan,2022,Shuswap,,57.619048
5,satisfaction_with_life,Thompson Okanagan,2022,Similkameen Valley,,74.0
6,satisfaction_with_life,Thompson Okanagan,2022,South Okanagan,,65.130435
7,satisfaction_with_life,Thompson Okanagan,2022,All Thompson Okanagan,,59.992674


## Indicator 2  
Satisfaction with different elements:  
1) **Sense of belonging in community**: How would you describe your feeling of belonging to your local community?  
2) **Satisfaction with access to recreation**: Your access to sports and recreational activities?  
3) **Satisfaction with access to cultural activities**: Your access to artistic and cultural activities?  
4) **Sense of a healthy environment**: How healthy is your physical environment?  
5) **Satisfaction with safety**: How satisfied are you with your personal safety in your city or town?  

In [16]:
columns_sel = ['id', 'Sense of belonging in community', 'Satisfaction with access to recreation',
         'Satisfaction with access to cultural activities', 'Sense of a healthy environment', 'Satisfaction with safety']
satisfaction_q = original_data[columns_sel].copy()
satisfaction_q.columns = ['id', 'q1', 'q2', 'q3', 'q4', 'q5']
satisfaction_q.head()

Unnamed: 0,id,q1,q2,q3,q4,q5
0,0,75.0,75.0,100.0,75.0,50.0
1,1,0.0,75.0,50.0,75.0,100.0
2,2,100.0,100.0,0.0,75.0,100.0
3,3,50.0,50.0,75.0,50.0,75.0
4,4,100.0,75.0,75.0,50.0,50.0


In [18]:
satisfaction_q_long = satisfaction_q.melt(id_vars=['id'], value_vars=['q1', 'q2', 'q3', 'q4', 'q5'], var_name='question', value_name='value')
satisfaction_q_long.dropna(inplace=True)
satisfaction_q_long.head(10)

Unnamed: 0,id,question,value
0,0,q1,75.0
1,1,q1,0.0
2,2,q1,100.0
3,3,q1,50.0
4,4,q1,100.0
5,5,q1,25.0
7,7,q1,50.0
8,8,q1,75.0
9,9,q1,25.0
11,11,q1,50.0


In [19]:
satisfaction_q_long.question.value_counts()

q2    1199
q3    1196
q1    1165
q5    1153
q4    1148
Name: question, dtype: int64

In [48]:
satisfaction_q_values = pd.DataFrame(satisfaction_q_long.groupby(['question', 'value'])['value'].count())
satisfaction_q_values.rename(columns={'value':'result'}, inplace=True)

satisfaction_q_values.reset_index(inplace=True)
satisfaction_q_values.rename(columns={'value':'category'}, inplace=True)

satisfaction_q_values

Unnamed: 0,question,category,result
0,q1,0.0,188
1,q1,25.0,242
2,q1,50.0,368
3,q1,75.0,264
4,q1,100.0,103
5,q2,0.0,107
6,q2,25.0,178
7,q2,50.0,304
8,q2,75.0,467
9,q2,100.0,143


In [41]:
#Check individual value
len(satisfaction_q_long[(satisfaction_q_long.question == 'q1') & (satisfaction_q_long.value == 0)])

188

In [46]:
satisfaction_q_avg = pd.DataFrame(satisfaction_q_long.groupby(['question'])['value'].mean())
satisfaction_q_avg.rename(columns={'value':'result'}, inplace=True)
satisfaction_q_avg['category'] = 'avg'
satisfaction_q_avg.reset_index(inplace=True)

satisfaction_q_total = pd.DataFrame(satisfaction_q_long.groupby(['question'])['value'].count())
satisfaction_q_total.rename(columns={'value':'result'}, inplace=True)
satisfaction_q_total['category'] = 'total'
satisfaction_q_total.reset_index(inplace=True)

satisfaction_q_stats = pd.concat([satisfaction_q_avg, satisfaction_q_total]).reset_index(drop=True)
satisfaction_q_stats

Unnamed: 0,question,result,category
0,q1,46.824034,avg
1,q2,57.527106,avg
2,q3,51.672241,avg
3,q4,64.851916,avg
4,q5,50.520382,avg
5,q1,1165.0,total
6,q2,1199.0,total
7,q3,1196.0,total
8,q4,1148.0,total
9,q5,1153.0,total


In [49]:
satisfaction_q_combined = pd.concat([satisfaction_q_values, satisfaction_q_stats]).reset_index(drop=True)
satisfaction_q_combined

Unnamed: 0,question,category,result
0,q1,0.0,188.0
1,q1,25.0,242.0
2,q1,50.0,368.0
3,q1,75.0,264.0
4,q1,100.0,103.0
5,q2,0.0,107.0
6,q2,25.0,178.0
7,q2,50.0,304.0
8,q2,75.0,467.0
9,q2,100.0,143.0


In [50]:
indicator_2 = satisfaction_q_combined.rename(columns={'question':'category_1', 'category':'category_2', 'result':'value'})
indicator_2['indicator'] = 'satisfaction_with_elements'
indicator_2['region'] = 'Thompson Okanagan'
indicator_2['date'] = 2022
indicator_2 = indicator_2[['indicator','region','date','category_1','category_2','value']]
indicator_2

Unnamed: 0,indicator,region,date,category_1,category_2,value
0,satisfaction_with_elements,Thompson Okanagan,2022,q1,0.0,188.0
1,satisfaction_with_elements,Thompson Okanagan,2022,q1,25.0,242.0
2,satisfaction_with_elements,Thompson Okanagan,2022,q1,50.0,368.0
3,satisfaction_with_elements,Thompson Okanagan,2022,q1,75.0,264.0
4,satisfaction_with_elements,Thompson Okanagan,2022,q1,100.0,103.0
5,satisfaction_with_elements,Thompson Okanagan,2022,q2,0.0,107.0
6,satisfaction_with_elements,Thompson Okanagan,2022,q2,25.0,178.0
7,satisfaction_with_elements,Thompson Okanagan,2022,q2,50.0,304.0
8,satisfaction_with_elements,Thompson Okanagan,2022,q2,75.0,467.0
9,satisfaction_with_elements,Thompson Okanagan,2022,q2,100.0,143.0


## Indicator 3  
Satisfaction with the state of tourism in your location

In [61]:
satisfaction_tourism = original_data[['id', 'Satisfaction with tourism']].dropna().copy()
satisfaction_tourism['satisfaction'] = np.where(satisfaction_tourism['Satisfaction with tourism'] < 20, 'Very Low Satisfaction', 
                                                np.where(satisfaction_tourism['Satisfaction with tourism'] < 40, 'Low Satisfaction',
                                                np.where(satisfaction_tourism['Satisfaction with tourism'] < 60, 'Not bad, Not Good',
                                                np.where(satisfaction_tourism['Satisfaction with tourism'] < 80, 'Satisfied',
                                                'Very Satisfied'))))

satisfaction_tourism.head()

Unnamed: 0,id,Satisfaction with tourism,satisfaction
0,0,70.0,Satisfied
4,4,90.0,Very Satisfied
5,5,80.0,Very Satisfied
6,6,70.0,Satisfied
7,7,50.0,"Not bad, Not Good"


In [62]:
satisfaction_tourism_val = pd.DataFrame(satisfaction_tourism.groupby('satisfaction')['id'].count())
satisfaction_tourism_val.reset_index(inplace=True)
satisfaction_tourism_val.rename(columns={'id':'value', 'satisfaction':'category'}, inplace=True)
satisfaction_tourism_val

Unnamed: 0,category,value
0,Low Satisfaction,96
1,"Not bad, Not Good",292
2,Satisfied,273
3,Very Low Satisfaction,83
4,Very Satisfied,246


In [65]:
ind3_avg = satisfaction_tourism['Satisfaction with tourism'].mean()
satisfaction_tourism_avg = pd.DataFrame({'category':['avg'], 'value':[ind3_avg]})
satisfaction_tourism_combined = pd.concat([satisfaction_tourism_val, satisfaction_tourism_avg]).reset_index(drop=True)
satisfaction_tourism_combined

Unnamed: 0,category,value
0,Low Satisfaction,96.0
1,"Not bad, Not Good",292.0
2,Satisfied,273.0
3,Very Low Satisfaction,83.0
4,Very Satisfied,246.0
5,avg,56.707071


In [67]:
indicator_3 = satisfaction_tourism_combined.rename(columns={'category':'category_1'})
indicator_3['indicator'] = 'satisfaction_with_tourism'
indicator_3['region'] = 'Thompson Okanagan'
indicator_3['date'] = 2022
indicator_3['category_2'] = ''
indicator_3 = indicator_3[['indicator','region','date','category_1','category_2','value']]
indicator_3

Unnamed: 0,indicator,region,date,category_1,category_2,value
0,satisfaction_with_tourism,Thompson Okanagan,2022,Low Satisfaction,,96.0
1,satisfaction_with_tourism,Thompson Okanagan,2022,"Not bad, Not Good",,292.0
2,satisfaction_with_tourism,Thompson Okanagan,2022,Satisfied,,273.0
3,satisfaction_with_tourism,Thompson Okanagan,2022,Very Low Satisfaction,,83.0
4,satisfaction_with_tourism,Thompson Okanagan,2022,Very Satisfied,,246.0
5,satisfaction_with_tourism,Thompson Okanagan,2022,avg,,56.707071


## Indicator 4
Satisfaction with different aspects of tourism at your location.  
1) Satisfaction with tourism .1: Overall, the number of tourists to my site should:
2) Tourism:	I would welcome visitors from: (Check all that appy)
3) Jobs with tourism: Where to advertise your region (Check all that apply)



In [69]:
ind4 = original_data[['id', 'Satisfaction with tourism .1', 'Tourism', 'Jobs with tourism']].dropna().copy()
ind4.head()

Unnamed: 0,id,Satisfaction with tourism .1,Tourism,Jobs with tourism
4,4,Increase,Nearby BC communities | All of BC | Ot...,British Columbia Visitors | Canadian V...
5,5,Stay the same,Nearby BC communities | All of BC | Ot...,British Columbia Visitors | Canadian V...
7,7,Increase,Nearby BC communities | All of BC | Ot...,British Columbia Visitors | Canadian V...
8,8,Increase,Nearby BC communities | All of BC | Ot...,British Columbia Visitors | Canadian V...
9,9,I do not have an opinion.,Nearby BC communities | All of BC,British Columbia Visitors


In [70]:
ind4_long = ind4.melt(id_vars=['id'], value_vars=['Satisfaction with tourism .1', 'Tourism', 'Jobs with tourism'], var_name='question', value_name='value')
ind4_long.head(10)

Unnamed: 0,id,question,value
0,4,Satisfaction with tourism .1,Increase
1,5,Satisfaction with tourism .1,Stay the same
2,7,Satisfaction with tourism .1,Increase
3,8,Satisfaction with tourism .1,Increase
4,9,Satisfaction with tourism .1,I do not have an opinion.
5,11,Satisfaction with tourism .1,Stay the same
6,12,Satisfaction with tourism .1,Increase
7,13,Satisfaction with tourism .1,Increase
8,15,Satisfaction with tourism .1,Increase
9,16,Satisfaction with tourism .1,Increase


In [76]:
jobs = ind4_long.loc[ind4_long['question'] == 'Jobs with tourism'].copy()

jobs_exp = jobs['value'].dropna().str.split('|', expand=True)
jobs_exp['id'] = jobs_exp.index

jobs_long = pd.melt(jobs_exp, id_vars=['id'], var_name='option', value_name='value')
jobs_long['question'] = 'Jobs with tourism'
jobs_long = jobs_long[~jobs_long['value'].isna()]
jobs_long['value'] = jobs_long['value'].str.strip()
jobs_long

Unnamed: 0,id,option,value,question
0,1970,0,British Columbia Visitors,Jobs with tourism
1,1971,0,British Columbia Visitors,Jobs with tourism
2,1972,0,British Columbia Visitors,Jobs with tourism
3,1973,0,British Columbia Visitors,Jobs with tourism
4,1974,0,British Columbia Visitors,Jobs with tourism
...,...,...,...,...
3929,2944,3,Other countries,Jobs with tourism
3932,2947,3,Other countries,Jobs with tourism
3933,2948,3,Other countries,Jobs with tourism
3934,2949,3,Other countries,Jobs with tourism


In [83]:
jobs_ind = jobs_long.groupby('value').count().reset_index()
jobs_ind.drop(columns=['option', 'question'], inplace=True)
jobs_ind.rename(columns={'value':'category','id':'value'}, inplace=True)
jobs_ind['question'] = 'q2'
jobs_ind

Unnamed: 0,category,value,question
0,British Columbia Visitors,908,q2
1,Canadian Visitors,828,q2
2,Other countries,694,q2
3,United States of America,656,q2


In [84]:
tourism = ind4_long.loc[ind4_long['question'] == 'Tourism'].copy()

tourism_exp = tourism['value'].dropna().str.split('|', expand=True)
tourism_exp['id'] = tourism_exp.index

tourism_long = pd.melt(tourism_exp, id_vars=['id'], var_name='option', value_name='value')
tourism_long['question'] = 'Tourism'
tourism_long = tourism_long[~tourism_long['value'].isna()]
tourism_long['value'] = tourism_long['value'].str.strip()

tourism_ind = tourism_long.groupby('value').count().reset_index()
tourism_ind.drop(columns=['option', 'question'], inplace=True)
tourism_ind.rename(columns={'value':'category','id':'value'}, inplace=True)
tourism_ind['question'] = 'q3'
tourism_ind

Unnamed: 0,category,value,question
0,All of BC,883,q3
1,Nearby BC communities,792,q3
2,Other Canadian provinces,820,q3
3,Other countries,758,q3
4,United States of America,662,q3


In [86]:
satisfaction = ind4_long.loc[(ind4_long['question'] == 'Satisfaction with tourism .1') & (ind4_long['value']\
    .isin(['Increase ', 'Stay the same ', 'I do not have an opinion.','Decrease']))].copy()
satisfaction

Unnamed: 0,id,question,value
0,4,Satisfaction with tourism .1,Increase
1,5,Satisfaction with tourism .1,Stay the same
2,7,Satisfaction with tourism .1,Increase
3,8,Satisfaction with tourism .1,Increase
4,9,Satisfaction with tourism .1,I do not have an opinion.
...,...,...,...
980,1360,Satisfaction with tourism .1,Stay the same
981,1361,Satisfaction with tourism .1,Increase
982,1363,Satisfaction with tourism .1,Stay the same
983,1364,Satisfaction with tourism .1,I do not have an opinion.


In [88]:
satisfaction_ind = satisfaction.groupby('value').count().reset_index()

satisfaction_ind.drop(columns=['question'], inplace=True)
satisfaction_ind.rename(columns={'value':'category','id':'value'}, inplace=True)
satisfaction_ind['question'] = 'q1'
satisfaction_ind


Unnamed: 0,category,value,question
0,Decrease,184,q1
1,I do not have an opinion.,204,q1
2,Increase,214,q1
3,Stay the same,381,q1


In [90]:
indicator_4 = pd.concat([jobs_ind, tourism_ind, satisfaction_ind], ignore_index=True)
indicator_4 = indicator_4.rename(columns={'question':'category_1', 'category':'category_2'})
indicator_4['indicator'] = 'satisfaction_with_tourism_2'
indicator_4['region'] = 'Thompson Okanagan'
indicator_4['date'] = 2022
indicator_4 = indicator_4[['indicator','region','date','category_1','category_2','value']]
indicator_4

Unnamed: 0,indicator,region,date,category_1,category_2,value
0,satisfaction_with_tourism_2,Thompson Okanagan,2022,q2,British Columbia Visitors,908
1,satisfaction_with_tourism_2,Thompson Okanagan,2022,q2,Canadian Visitors,828
2,satisfaction_with_tourism_2,Thompson Okanagan,2022,q2,Other countries,694
3,satisfaction_with_tourism_2,Thompson Okanagan,2022,q2,United States of America,656
4,satisfaction_with_tourism_2,Thompson Okanagan,2022,q3,All of BC,883
5,satisfaction_with_tourism_2,Thompson Okanagan,2022,q3,Nearby BC communities,792
6,satisfaction_with_tourism_2,Thompson Okanagan,2022,q3,Other Canadian provinces,820
7,satisfaction_with_tourism_2,Thompson Okanagan,2022,q3,Other countries,758
8,satisfaction_with_tourism_2,Thompson Okanagan,2022,q3,United States of America,662
9,satisfaction_with_tourism_2,Thompson Okanagan,2022,q1,Decrease,184


## final checks and combine

In [103]:
indicator_1.head(2)

Unnamed: 0,indicator,region,date,category_1,category_2,value
0,satisfaction_with_life,Thompson Okanagan,2022,Boundary Country,,43.75
1,satisfaction_with_life,Thompson Okanagan,2022,Central Okanagan,,60.573664


In [111]:
indicator_2.tail(5)

Unnamed: 0,indicator,region,date,category_1,category_2,value
30,satisfaction_with_elements,Thompson Okanagan,2022,q1,total,1165.0
31,satisfaction_with_elements,Thompson Okanagan,2022,q2,total,1199.0
32,satisfaction_with_elements,Thompson Okanagan,2022,q3,total,1196.0
33,satisfaction_with_elements,Thompson Okanagan,2022,q4,total,1148.0
34,satisfaction_with_elements,Thompson Okanagan,2022,q5,total,1153.0


In [106]:
indicator_3

Unnamed: 0,indicator,region,date,category_1,category_2,value
0,satisfaction_with_tourism,Thompson Okanagan,2022,Low Satisfaction,,96.0
1,satisfaction_with_tourism,Thompson Okanagan,2022,"Not bad, Not Good",,292.0
2,satisfaction_with_tourism,Thompson Okanagan,2022,Satisfied,,273.0
3,satisfaction_with_tourism,Thompson Okanagan,2022,Very Low Satisfaction,,83.0
4,satisfaction_with_tourism,Thompson Okanagan,2022,Very Satisfied,,246.0
5,satisfaction_with_tourism,Thompson Okanagan,2022,avg,,56.707071


In [112]:
indicator_4

Unnamed: 0,indicator,region,date,category_1,category_2,value
0,satisfaction_with_tourism_2,Thompson Okanagan,2022,q2,British Columbia Visitors,908
1,satisfaction_with_tourism_2,Thompson Okanagan,2022,q2,Canadian Visitors,828
2,satisfaction_with_tourism_2,Thompson Okanagan,2022,q2,Other countries,694
3,satisfaction_with_tourism_2,Thompson Okanagan,2022,q2,United States of America,656
4,satisfaction_with_tourism_2,Thompson Okanagan,2022,q3,All of BC,883
5,satisfaction_with_tourism_2,Thompson Okanagan,2022,q3,Nearby BC communities,792
6,satisfaction_with_tourism_2,Thompson Okanagan,2022,q3,Other Canadian provinces,820
7,satisfaction_with_tourism_2,Thompson Okanagan,2022,q3,Other countries,758
8,satisfaction_with_tourism_2,Thompson Okanagan,2022,q3,United States of America,662
9,satisfaction_with_tourism_2,Thompson Okanagan,2022,q1,Decrease,184


In [92]:
df_to_save = pd.concat([indicator_1, indicator_2, indicator_3, indicator_4], ignore_index=True)
df_to_save

Unnamed: 0,indicator,region,date,category_1,category_2,value
0,satisfaction_with_life,Thompson Okanagan,2022,Boundary Country,,43.750000
1,satisfaction_with_life,Thompson Okanagan,2022,Central Okanagan,,60.573664
2,satisfaction_with_life,Thompson Okanagan,2022,Gold Country,,85.000000
3,satisfaction_with_life,Thompson Okanagan,2022,North Okanagan,,62.460317
4,satisfaction_with_life,Thompson Okanagan,2022,Shuswap,,57.619048
...,...,...,...,...,...,...
57,satisfaction_with_tourism_2,Thompson Okanagan,2022,q3,United States of America,662.000000
58,satisfaction_with_tourism_2,Thompson Okanagan,2022,q1,Decrease,184.000000
59,satisfaction_with_tourism_2,Thompson Okanagan,2022,q1,I do not have an opinion.,204.000000
60,satisfaction_with_tourism_2,Thompson Okanagan,2022,q1,Increase,214.000000


In [95]:
df_to_save.to_csv('../data/local_satisfaction_EXPORT_CSV.csv', index=False)

## Transform to long format

In [97]:
df_long = pd.melt(original_data, id_vars=['id'], var_name='Topic', value_name='value')
df_long = df_long[~df_long['id'].isna()]
df_long

Unnamed: 0,id,Topic,value
0,0,Satisfaction with life,80.0
1,1,Satisfaction with life,60.0
2,2,Satisfaction with life,80.0
3,3,Satisfaction with life,70.0
4,4,Satisfaction with life,70.0
...,...,...,...
23217,1361,Demographic,Similkameen Valley
23218,1362,Demographic,
23219,1363,Demographic,Central Okanagan
23220,1364,Demographic,North Okanagan


In [99]:
df_long = df_long[df_long['Topic'] != 'Demographic']

In [100]:
df_long_complete = pd.merge(df_long, demographic_df, on='id', how='left')
df_long_complete

Unnamed: 0,id,Topic,value,Demographic
0,0,Satisfaction with life,80.0,Central Okanagan
1,1,Satisfaction with life,60.0,
2,2,Satisfaction with life,80.0,Central Okanagan
3,3,Satisfaction with life,70.0,
4,4,Satisfaction with life,70.0,Central Okanagan
...,...,...,...,...
21851,1361,Sense of a healthy environment,75.0,Similkameen Valley
21852,1362,Sense of a healthy environment,,
21853,1363,Sense of a healthy environment,75.0,Central Okanagan
21854,1364,Sense of a healthy environment,50.0,North Okanagan


## 2) Modify / group data  
The following categories (questions/topics) need cleaning, separating multiple choice answers and/or removing invalid answers  
- Jobs with tourism  
- Tourism  
- Satisfaction with tourism .1  

The process will be to get the subset of data, modify it, and then remove and replace on the initial data table.

### Jobs with tourism 
**Question:**  
_If you saw an advertisement promoting your region as a place for tourists to visit would you agree it was appropriate for the following locations?  (Check all that apply)_

In [115]:
jobs = df_long_complete.loc[df_long_complete['Topic'] == 'Jobs with tourism'].copy()
jobs

Unnamed: 0,id,Topic,value,Demographic
6830,0,Jobs with tourism,,Central Okanagan
6831,1,Jobs with tourism,,
6832,2,Jobs with tourism,,Central Okanagan
6833,3,Jobs with tourism,,
6834,4,Jobs with tourism,British Columbia Visitors | Canadian V...,Central Okanagan
...,...,...,...,...
8191,1361,Jobs with tourism,Canadian Visitors | United States of America ...,Similkameen Valley
8192,1362,Jobs with tourism,,
8193,1363,Jobs with tourism,British Columbia Visitors | Canadian V...,Central Okanagan
8194,1364,Jobs with tourism,British Columbia Visitors,North Okanagan


In [116]:
jobs.value.unique()

array([nan,
       'British Columbia Visitors         | Canadian Visitors | United States of America        | Other countries',
       'British Columbia Visitors        ',
       'British Columbia Visitors         | Canadian Visitors | United States of America       ',
       'Canadian Visitors',
       'British Columbia Visitors         | Canadian Visitors | Other countries',
       'Other countries',
       'British Columbia Visitors         | Canadian Visitors',
       'United States of America       ',
       'British Columbia Visitors         | United States of America        | Other countries',
       'Canadian Visitors | United States of America       ',
       'Canadian Visitors | Other countries',
       'Canadian Visitors | United States of America        | Other countries',
       'British Columbia Visitors         | Other countries',
       'British Columbia Visitors         | United States of America       ',
       'United States of America        | Other countries'], dty

Get choices in different columns and then in long format

In [117]:
jobs_exp = jobs['value'].dropna().str.split('|', expand=True)
jobs_exp['id'] = jobs_exp.index
jobs_exp

Unnamed: 0,0,1,2,3,id
6834,British Columbia Visitors,Canadian Visitors,United States of America,Other countries,6834
6835,British Columbia Visitors,Canadian Visitors,United States of America,Other countries,6835
6836,British Columbia Visitors,Canadian Visitors,United States of America,Other countries,6836
6837,British Columbia Visitors,Canadian Visitors,United States of America,Other countries,6837
6838,British Columbia Visitors,Canadian Visitors,United States of America,Other countries,6838
...,...,...,...,...,...
8190,British Columbia Visitors,Canadian Visitors,United States of America,Other countries,8190
8191,Canadian Visitors,United States of America,Other countries,,8191
8193,British Columbia Visitors,Canadian Visitors,,,8193
8194,British Columbia Visitors,,,,8194


In [118]:
jobs_long = pd.melt(jobs_exp, id_vars=['id'], var_name='Topic', value_name='value')
jobs_long['Topic'] = 'Jobs with tourism'
jobs_long = jobs_long[~jobs_long['value'].isna()]
jobs_long['value'] = jobs_long['value'].str.strip()
jobs_long

Unnamed: 0,id,Topic,value
0,6834,Jobs with tourism,British Columbia Visitors
1,6835,Jobs with tourism,British Columbia Visitors
2,6836,Jobs with tourism,British Columbia Visitors
3,6837,Jobs with tourism,British Columbia Visitors
4,6838,Jobs with tourism,British Columbia Visitors
...,...,...,...
4097,8179,Jobs with tourism,Other countries
4100,8184,Jobs with tourism,Other countries
4101,8186,Jobs with tourism,Other countries
4102,8187,Jobs with tourism,Other countries


### Tourism  
**Question:**  
_I would welcome visitors from: (Check all that appy)_

Same process as before

In [108]:
tourism = df_long_complete.loc[df_long_complete['Topic'] == 'Tourism'].copy()
tourism_exp = tourism['value'].dropna().str.split('|', expand=True)
tourism_exp['id'] = tourism_exp.index
tourism_exp.head()



Unnamed: 0,0,1,2,3,4,id
5468,Nearby BC communities,All of BC,Other Canadian provinces,United States of America,Other countries,5468
5469,Nearby BC communities,All of BC,Other Canadian provinces,United States of America,Other countries,5469
5470,Nearby BC communities,All of BC,Other Canadian provinces,United States of America,Other countries,5470
5471,Nearby BC communities,All of BC,Other Canadian provinces,United States of America,Other countries,5471
5472,Nearby BC communities,All of BC,Other Canadian provinces,United States of America,Other countries,5472


In [109]:
tourism_long = pd.melt(tourism_exp, id_vars=['id'], var_name='Topic', value_name='value')
tourism_long['Topic'] = 'Tourism'
tourism_long = tourism_long[~tourism_long['value'].isna()]
tourism_long['value'] = tourism_long['value'].str.strip()
tourism_long

Unnamed: 0,id,Topic,value
0,5468,Tourism,Nearby BC communities
1,5469,Tourism,Nearby BC communities
2,5470,Tourism,Nearby BC communities
3,5471,Tourism,Nearby BC communities
4,5472,Tourism,Nearby BC communities
...,...,...,...
5189,6821,Tourism,Other countries
5190,6824,Tourism,Other countries
5192,6827,Tourism,Other countries
5193,6828,Tourism,Other countries


### Satisfaction with tourism .1

In [111]:
df_long_complete[df_long_complete['Topic'] == 'Satisfaction with tourism .1']['value'].unique()

array(['Increase ', nan, 'Stay the same ', 'I do not have an opinion.',
       'Decrease', '特に意見はない', 'Meningkat'], dtype=object)

In [112]:
satisfaction = df_long_complete.loc[(df_long_complete['Topic'] == 'Satisfaction with tourism .1') & (df_long_complete['value']\
    .isin(['Increase ', 'Stay the same ', 'I do not have an opinion.','Decrease']))].copy()
satisfaction

Unnamed: 0,id,Topic,value,Demographic
4098,0,Satisfaction with tourism .1,Increase,Central Okanagan
4100,2,Satisfaction with tourism .1,Increase,Central Okanagan
4102,4,Satisfaction with tourism .1,Increase,Central Okanagan
4103,5,Satisfaction with tourism .1,Stay the same,Central Okanagan
4105,7,Satisfaction with tourism .1,Increase,Central Okanagan
...,...,...,...,...
5458,1360,Satisfaction with tourism .1,Stay the same,North Okanagan
5459,1361,Satisfaction with tourism .1,Increase,Similkameen Valley
5461,1363,Satisfaction with tourism .1,Stay the same,Central Okanagan
5462,1364,Satisfaction with tourism .1,I do not have an opinion.,North Okanagan


### Combine with dataset  

In [123]:
df_long_updated = df_long_complete.loc[~(df_long_complete['Topic'].isin(['Satisfaction with tourism .1', 'Tourism', 'Jobs with tourism']))]
df_long_updated = df_long_updated.append([satisfaction, jobs_long, tourism_long])

In [124]:
df_long_updated[df_long_updated['Topic'] == 'Jobs with tourism']['value'].value_counts()

British Columbia Visitors    941
Canadian Visitors            858
Other countries              719
United States of America     678
Name: value, dtype: int64

In [129]:
df_long_updated['Topic'] = df_long_updated['Topic'].str.strip()
df_long_updated['Topic'] = df_long_updated['Topic'].str.lower().str.replace(' ', '_')
df_long_updated['region'] = 'Thompson Okanagan'
df_long_updated['date'] = 2022
df_long_updated['category_1'] = ''
df_long_updated.rename(columns={'Topic':'indicator', 'Demographic':'category_2'}, inplace=True)

In [130]:
df_long_updated

Unnamed: 0,id,indicator,value,category_2,region,date,category_1
0,0,satisfaction_with_life,80.0,Central Okanagan,Thompson Okanagan,2022,
1,1,satisfaction_with_life,60.0,,Thompson Okanagan,2022,
2,2,satisfaction_with_life,80.0,Central Okanagan,Thompson Okanagan,2022,
3,3,satisfaction_with_life,70.0,,Thompson Okanagan,2022,
4,4,satisfaction_with_life,70.0,Central Okanagan,Thompson Okanagan,2022,
...,...,...,...,...,...,...,...
5189,6821,tourism,Other countries,,Thompson Okanagan,2022,
5190,6824,tourism,Other countries,,Thompson Okanagan,2022,
5192,6827,tourism,Other countries,,Thompson Okanagan,2022,
5193,6828,tourism,Other countries,,Thompson Okanagan,2022,


In [131]:
indicator_csv = df_long_updated[['indicator','region','date','category_1','category_2','value']]
indicator_csv

Unnamed: 0,indicator,region,date,category_1,category_2,value
0,satisfaction_with_life,Thompson Okanagan,2022,,Central Okanagan,80.0
1,satisfaction_with_life,Thompson Okanagan,2022,,,60.0
2,satisfaction_with_life,Thompson Okanagan,2022,,Central Okanagan,80.0
3,satisfaction_with_life,Thompson Okanagan,2022,,,70.0
4,satisfaction_with_life,Thompson Okanagan,2022,,Central Okanagan,70.0
...,...,...,...,...,...,...
5189,tourism,Thompson Okanagan,2022,,,Other countries
5190,tourism,Thompson Okanagan,2022,,,Other countries
5192,tourism,Thompson Okanagan,2022,,,Other countries
5193,tourism,Thompson Okanagan,2022,,,Other countries


## 3) Add questions to dataset  
Load corresponding sheet from workbook, join by topic

In [45]:
sheet_name = 'questions_table'
url = f'https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}'

question_data = pd.read_csv(url)
question_data['Topic'] = question_data['Topic'].str.strip()
question_data


Unnamed: 0,Topic,Question
0,Satisfaction with life,"Overall, how satisfied are you with your life ..."
1,Sense of belonging in community,How would you describe your feeling of belongi...
2,Satisfaction with tourism,How satisfied are you with the state of touris...
3,Satisfaction with tourism,"Overall, the number of tourists to my site sho..."
4,Tourism,I would welcome visitors from: (Check all that...
5,Jobs with tourism,If you saw an advertisement promoting your reg...
6,Entrepreneurship with tourism,Tourism creates jobs for local people at my site.
7,Local culture from tourism,Tourism promotes local entrepreneurship at my ...
8,Production of local productions with tourism,Tourism promotes the local culture at my site.
9,Views on policies on tourism,Tourism promotes production of local products ...


In [47]:
df_long_updated.Topic.unique()

array(['Satisfaction with life', 'Sense of belonging in community',
       'Satisfaction with tourism', 'Entrepreneurship with tourism',
       'Local culture from tourism',
       'Production of local productions with tourism',
       'Views on policies on tourism',
       'Satisfaction with the quality of basic educational services',
       'Satisfaction with the quality of basic healh services',
       'Satisfaction with access to recreation',
       'Satisfaction with access to cultural activities',
       'Satisfaction with safety', 'Sense of a healthy environment',
       'Satisfaction with tourism .1', 'Jobs with tourism', 'Tourism'],
      dtype=object)

In [48]:
question_data.iloc[3]['Topic'] = 'Satisfaction with tourism .1'
question_data

Unnamed: 0,Topic,Question
0,Satisfaction with life,"Overall, how satisfied are you with your life ..."
1,Sense of belonging in community,How would you describe your feeling of belongi...
2,Satisfaction with tourism,How satisfied are you with the state of touris...
3,Satisfaction with tourism .1,"Overall, the number of tourists to my site sho..."
4,Tourism,I would welcome visitors from: (Check all that...
5,Jobs with tourism,If you saw an advertisement promoting your reg...
6,Entrepreneurship with tourism,Tourism creates jobs for local people at my site.
7,Local culture from tourism,Tourism promotes local entrepreneurship at my ...
8,Production of local productions with tourism,Tourism promotes the local culture at my site.
9,Views on policies on tourism,Tourism promotes production of local products ...


In [49]:
df_long = pd.merge(df_long_updated, question_data, on='Topic', how='left')
df_long

Unnamed: 0,id,Topic,Answer,Question
0,0,Satisfaction with life,80,"Overall, how satisfied are you with your life ..."
1,1,Satisfaction with life,60,"Overall, how satisfied are you with your life ..."
2,2,Satisfaction with life,80,"Overall, how satisfied are you with your life ..."
3,3,Satisfaction with life,70,"Overall, how satisfied are you with your life ..."
4,4,Satisfaction with life,70,"Overall, how satisfied are you with your life ..."
...,...,...,...,...
26051,6816,Tourism,Other countries,I would welcome visitors from: (Check all that...
26052,6819,Tourism,Other countries,I would welcome visitors from: (Check all that...
26053,6822,Tourism,Other countries,I would welcome visitors from: (Check all that...
26054,6823,Tourism,Other countries,I would welcome visitors from: (Check all that...


## 4) Save data  
The dataset is ready for use in visualizations (Tableau or Python) or for further formatting for the API when the data model is ready

In [50]:
df_long.to_csv('../data/local_satisfaction_long_data.csv', index=False)