In [1]:
import pandas as pd
import numpy as np

import plotly.express as px
from matplotlib import pyplot as plt

import seaborn as sns

In [2]:
#Adding Google Drive as an accessible path
from google.colab import drive
drive.mount('/content/drive')

%cd /content/drive/My Drive/Colab Notebooks/SMT project

Mounted at /content/drive
/content/drive/My Drive/Colab Notebooks/SMT project


In [3]:
df = pd.read_csv('cleaned_survey.csv')

df.head()

Unnamed: 0,Q1.1,Q1.1_3_TEXT,Q1.2,Q1.3,Q1.4,Q1.4_4_TEXT,Q1.5,Q1.6,Q1.7,Q2.1,...,Q5.4_7_TEXT,Q6.1_1,Q6.1_2,Q6.1_3,Q6.1_4,Q6.1_5,Q6.2,cleaned_Q2.12,cleaned_Q4.8,cleaned_Q6.2
0,Singapore Citizen,,16-19 years old,Female,Chinese,,Full-time Student,Polytechnic,Singapore Polytechnic,I do not smoke at all / I am a non-smoker,...,,No,Yes,Yes,Yes,Yes,"teach better coping mechanisms, educate the yo...",,,teach better coping mechanism educate youth va...
1,Singapore Citizen,,16-19 years old,Female,Chinese,,Full-time Student,Polytechnic,Singapore Polytechnic,I do not smoke at all / I am a non-smoker,...,,No,No,Yes,Yes,Yes,It hasn't been studied fully yet and the liqui...,,,studied fully yet liquid used vape laced drug
2,Singapore Citizen,,16-19 years old,Female,Chinese,,Full-time Student,Polytechnic,Singapore Polytechnic,I do not smoke at all / I am a non-smoker,...,,No,No,Yes,Yes,Yes,Talk about it instead of acting like it is taboo.,,,talk instead acting like taboo
3,Permanent Resident,,16-19 years old,Female,"Others, please specify",Filipino,Full-time Student,Polytechnic,Singapore Polytechnic,I do not smoke at all / I am a non-smoker,...,,No,No,Yes,Yes,I don't know,I'm not sure tbh,,,sure tbh
4,"Others, please specify",International student,16-19 years old,Female,"Others, please specify",Korean,Full-time Student,Polytechnic,Singapore Polytechnic,I do not smoke at all / I am a non-smoker,...,,,,,,,,,,


# General Demographics of surveyees

Gender distribution based on whether surveyees vape

In [4]:
demo_df = df[df['Q2.4'].notna()]
gender = px.histogram(demo_df['Q1.3'],color = demo_df['Q2.4'], color_discrete_map={'No':'yellowgreen','Yes':'indianred'}, 
                      labels={'value':'gender','color':'Vape?'}, text_auto = True, 
                      title = 'Gender distribution on whether students vape', width=500)
gender.show()

Ethnic distribution on whether surveyees vape

In [5]:
ethnic = px.histogram(demo_df['Q1.4'],color = demo_df['Q2.4'], color_discrete_map={'No':'yellowgreen','Yes':'indianred'}, 
                      labels={'value':'ethnic'}, text_auto = True, 
                      title = 'Ethnic distribution on whether students vape', width=500)
ethnic.show()

School distribution on whether surveyees vape

In [6]:
schools = px.histogram(demo_df['Q1.7'],color = demo_df['Q2.4'], color_discrete_map={'No':'yellowgreen','Yes':'indianred'},
                       labels = {'value':'Schools'}, text_auto=True, 
                       title = 'Count of surveys from each school', width=500)
schools.show()

# Vapers

1. Did friends stop you from vaping?

In [7]:
# Filter the required columns
new_df = df.filter(['Q2.9'])
new_df.dropna(inplace=True)

In [8]:
interfered_vapers = list(new_df['Q2.9'].values)
new_dict = {}

for i in interfered_vapers:
  if new_dict.get(i) is not None:
    new_dict[i] += 1
  else:
    new_dict[i] = 1
          
print(new_dict)

{'No': 7, 'Yes': 12}


In [9]:
values = new_dict.values()
labels = new_dict.keys()

friend_interfere_vapers = px.pie(new_df, values=values, names=labels, color = labels,
                                 color_discrete_map={'No':'indianred','Yes':'yellowgreen'}, width = 500, height = 500)
friend_interfere_vapers.update_layout(title='Do friends advise vapers to quit vaping?')
friend_interfere_vapers.show()

2. How did your friend advice you against vaping?

In [10]:
df['Q2.10']= df['Q2.10'].str.replace("[\(\[].*?[\)\]]", "")

df.rename(columns={'Q2.10':'advicetype'}, inplace=True)

#separate each answer with more than one type of advice selected into rows (e.g. if Health, Legal reasons --> Legal reason will have new row)
new_df = df.assign(advicetype=df['advicetype'].str.split(' ,')).explode('advicetype')


The default value of regex will change from True to False in a future version.



In [11]:
new_df1 = new_df['advicetype'].str.strip()

In [12]:
advise = px.histogram(new_df1, text_auto = True, color_discrete_sequence= ['salmon'], labels = {'value':'reasons'},
                      title='How did friends advise vapers to quit vaping?', width=500)
advise.show()

In [13]:
df = pd.read_csv('cleaned_survey.csv')

In [14]:
others_details = df[df["Q2.10"] == "Others (please specify)"]
others_details = others_details[["Q2.10", "Q2.10_4_TEXT"]]
others_details

Unnamed: 0,Q2.10,Q2.10_4_TEXT
31,Others (please specify),tried


3. Do vapers like their friend to interfere?

In [15]:
interfered = list(df['Q2.11'].values)
new_dict = {}

for i in interfered:
  if str(i)!='nan':
    if new_dict.get(i) is not None:
      new_dict[i] += 1
    else:
      new_dict[i] = 1
          
print(new_dict)

{'Yes': 11, 'No': 1}


In [16]:
values = new_dict.values()
labels = new_dict.keys()

interfered = px.pie(new_df, values=values, names=labels, color = labels, 
                    color_discrete_map={'Yes':'yellowgreen','No':'indianred'}, width = 600, height = 500)
interfered.update_layout(title='Do vapers like their friend to advise them to quit vaping?')
interfered.show()

In [17]:
new_df = df.filter(['Q1.3','Q1.4','Q2.11'])

new_df = new_df.loc[new_df["Q2.11"]=="Yes"]

Demographic of those who like their friend to interfere (gender)

In [18]:
like_advise_gender = px.histogram(x=new_df['Q1.3'],color = new_df['Q1.3'], color_discrete_map={'Female':'orchid','Male':'deepskyblue'}, 
                                  labels = {'x':'gender'},text_auto = True, 
                                  title="Gender Distribution of those who liked their friends' interference", width = 600, height = 500)
like_advise_gender.show()

Demographic of those who like their friend to interfere (race)

In [19]:
like_advise_ethnic = px.histogram(x=new_df['Q1.4'], text_auto = True, color=new_df['Q1.4'], color_discrete_sequence= ['salmon'], width = 600, height = 500,
                   title="Ethnic Distribution of those who liked their friends' interference")
like_advise_ethnic.show()

# Non-vapers

1. Did you stop your friend from vaping?

In [20]:
# Filter the required columns
new_df = df.filter(['Q4.5'])
new_df.dropna(inplace=True)

In [21]:
interfered_friend = list(new_df['Q4.5'].values)
new_dict = {}

for i in interfered_friend:
  if new_dict.get(i) is not None:
    new_dict[i] += 1
  else:
    new_dict[i] = 1
          
print(new_dict)

{'I would like to help but not know where to start.': 3, 'I did not attempt to stop my friend from vaping.': 12, 'I tried to advise/advised my friends to stop vaping.': 7}


In [22]:
values = new_dict.values()
labels = new_dict.keys()

friend_interfere_vapers = px.pie(new_df, values=values, names=labels, color = labels,
                                 color_discrete_map={'I did not attempt to stop my friend from vaping.':'indianred',
                                                     'I tried to advise/advised my friends to stop vaping.':'yellowgreen',
                                                     'I would like to help but not know where to start.': 'lime'},
                                 width = 1000, height = 500)
friend_interfere_vapers.update_layout(title='Do non-vapers advise their friends to quit vaping?')
friend_interfere_vapers.show()

In [23]:
# General demographics of vapers
updated_dict = {'Yes':0,'No':0}
for i in new_dict.keys():
  if i== 'I did not attempt to stop my friend from vaping.':
      updated_dict['No'] += new_dict[i]
  else:
      updated_dict['Yes'] += new_dict[i]

updated_dict

{'Yes': 10, 'No': 12}

In [24]:
values = updated_dict.values()
labels = updated_dict.keys()

friend_interfere = px.pie(new_df, values=values, names=labels, color = labels,  
                          color_discrete_map={'Yes':'yellowgreen','No':'indianred'}, width = 600, height = 500)
friend_interfere.update_layout(title='Do non-vapers have the intention to stop their friends from vaping?')
friend_interfere.show()

Demographics of who advised and not advised their friends to quit vaping

In [25]:
friends_df = df[df['Q4.5'].notna()]

In [26]:
tried_advice = list(friends_df['Q4.5'].values)
new_list = []

for i in tried_advice:
  if i == "I did not attempt to stop my friend from vaping.":
    new_list.append('Yes')
  else:
    new_list.append('No')

new_list

['No',
 'Yes',
 'No',
 'Yes',
 'No',
 'Yes',
 'No',
 'No',
 'No',
 'Yes',
 'Yes',
 'Yes',
 'No',
 'Yes',
 'Yes',
 'Yes',
 'Yes',
 'Yes',
 'Yes',
 'No',
 'No',
 'No']

In [27]:
friends_df['will_stop_friend'] = new_list



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Gender distribution

In [28]:
gender_friend = px.histogram(friends_df['Q1.3'],color = friends_df['will_stop_friend'], 
                             color_discrete_map={'Yes':'yellowgreen','No':'indianred'}, 
                             labels={'value':'gender','color':'Did they stop their friends?'}, 
                             text_auto = True, title = 'Gender distribution on whether friends gave advise to vapers', 
                             width = 600, height = 500)
gender_friend.show()

Ethnic distribution

In [29]:
ethnic_friend = px.histogram(friends_df['Q1.4'],color = friends_df['will_stop_friend'], 
                             color_discrete_map={'Yes':'yellowgreen','No':'indianred'}, labels={'value':'gender'}, 
                             text_auto = True, title = 'Ethnic distribution on whether friends gave advise to vapers', 
                             width = 600, height = 500)
ethnic_friend.show()

How long they know each other

In [30]:
friends_df = friends_df[friends_df['Q4.1'].apply(lambda x: x.isnumeric())]
friends_df = friends_df[friends_df['Q4.2'].apply(lambda x: x.isnumeric())]

friends_df['Q4.1'] = friends_df['Q4.1'].astype('int')
friends_df['Q4.2'] = friends_df['Q4.2'].astype('int')

In [31]:
years_known = px.histogram(x = friends_df['Q4.1'], color = friends_df['will_stop_friend'], 
                           color_discrete_map={'Yes':'yellowgreen','No':'indianred'},
                    labels={'x':'years'}, nbins=5, text_auto=True, width = 600, height = 500,
                    title = 'How long they have known their vaping friend')
years_known.show()

How much time they spend with each other?

In [53]:
time_spent = px.histogram(x = friends_df['Q4.2'], color = friends_df['will_stop_friend'], 
                          color_discrete_map={'Yes':'yellowgreen','No':'indianred'},
                    labels={'x':'hours'}, nbins=5, text_auto=True,width = 600, height = 500,
                    title = 'How much time they spend with their vaping friend')
time_spent.show()

How they spend their time with each other?

In [54]:
how_spent = px.histogram(x = friends_df['Q4.3'], color = friends_df['will_stop_friend'], 
                         color_discrete_map={'Yes':'yellowgreen','No':'indianred'},
                    labels={'x':'activities'}, text_auto=True, width = 800, height = 500,
                    title = 'Actvities that they do with their friend who vapes')
how_spent.show()

In [34]:
others_details = friends_df[friends_df["Q4.3"] == "Others (please specify)"]
others_details = others_details[["Q4.3", "Q4.3_4_TEXT"]]
others_details

Unnamed: 0,Q4.3,Q4.3_4_TEXT
34,Others (please specify),Gaming online
64,Others (please specify),


How did they advise their friends to stop vaping?

In [35]:
df['Q4.6']= df['Q4.6'].str.replace("[\(\[].*?[\)\]]", "")
df.rename(columns={'Q4.6':'f_advicetype'}, inplace=True)


The default value of regex will change from True to False in a future version.



In [36]:
df1 = df.assign(f_advicetype=df['f_advicetype'].str.split(' ,')).explode('f_advicetype')
df1 = df.assign(f_advicetype=df['f_advicetype'].str.split(',')).explode('f_advicetype')

In [37]:
df2 = df1['f_advicetype'].str.strip()

In [38]:
f_advicetype = px.histogram(x=df2, text_auto = True, color_discrete_sequence= ['salmon'], 
                            labels={'x':'Advice methods'},
                      title='How did friends advise vapers to quit vaping?', 
                      width = 600, height = 600)
f_advicetype.show()

Why did they not advise?

In [39]:
not_advice = list(friends_df['Q4.7'].values)
not_advice_dict = {}

for i in not_advice:
  split_arr = str(i).split(",")
  for w in split_arr:
    if w!='nan':
      if not_advice_dict.get(w) is not None:
        not_advice_dict[w] += 1
      else:
        not_advice_dict[w] = 1
          
print(not_advice_dict)

{'Others (Please specify)': 2, 'I am not comfortable to speak about such topics': 4, 'We do not know each other very well.': 2, 'He/She/They are too stubborn to listen.': 1, 'There is no reason to stop him/her/them.': 1}


In [40]:
not_advice_df = pd.Series(not_advice_dict)

not_advice_df

Others (Please specify)                            2
I am not comfortable to speak about such topics    4
We do not know each other very well.               2
He/She/They are too stubborn to listen.            1
There is no reason to stop him/her/them.           1
dtype: int64

In [41]:
f_advise = px.bar(not_advice_df, text_auto = True, color_discrete_sequence= ['salmon'], 
                  labels = {'index':'reasons','value':'count'},
                  title='Why did friends not advise vapers to quit vaping?',width = 800, height = 800)
f_advise.update_layout(showlegend=False)
f_advise.show()

# Additional Insights

Why do students vape?

In [42]:
reasons_vapes = list(df['Q2.6'].values)
reasons_dict = {}

for i in reasons_vapes:
  if str(i)!='nan':
    split_arr = i.split(",")
    for w in split_arr:
      if w!=' please specify:':
        if reasons_dict.get(w) is not None:
          reasons_dict[w] += 1
        else:
          reasons_dict[w] = 1
          
print(reasons_dict)

{'Out of curiosity': 13, 'Helps me relax / relieve stress / cope with problems': 9, 'Vapes may be less harmful compared to regular cigarettes': 3, 'Vapes are better for the environment compared to regular cigarettes': 2, 'Vapes help me save money compared to regular cigarettes': 3, 'Vapes help me project a better image (i.e. makes me look cool/mature/attractive)': 2, 'Peer pressure': 3, 'I thought it was alright to vape as a family member /friend were also doing it': 2, 'Vapes helps me quit smoking': 1, 'Others': 1}


In [43]:
new_df = pd.Series(reasons_dict)

new_df

Out of curiosity                                                                    13
Helps me relax / relieve stress / cope with problems                                 9
Vapes may be less harmful compared to regular cigarettes                             3
Vapes are better for the environment compared to regular cigarettes                  2
Vapes help me save money compared to regular cigarettes                              3
Vapes help me project a better image (i.e. makes me look cool/mature/attractive)     2
Peer pressure                                                                        3
I thought it was alright to vape as a family member /friend were also doing it       2
Vapes helps me quit smoking                                                          1
Others                                                                               1
dtype: int64

In [44]:
why_students_vape= px.bar(new_df,color_discrete_sequence=['salmon'], labels = {'index': 'Reasons','value':'Count'}, 
                          text_auto = True, title = 'Why students vape')
why_students_vape.update_layout(showlegend=False)
why_students_vape.update_xaxes(categoryorder="total descending")
why_students_vape.show()

Knowledge on vaping

In [45]:
new_df = df.filter(['Q6.1_1','Q6.1_2','Q6.1_3','Q6.1_4','Q6.1_5'])
new_df.dropna(inplace=True)

new_df

Unnamed: 0,Q6.1_1,Q6.1_2,Q6.1_3,Q6.1_4,Q6.1_5
0,No,Yes,Yes,Yes,Yes
1,No,No,Yes,Yes,Yes
2,No,No,Yes,Yes,Yes
3,No,No,Yes,Yes,I don't know
5,No,Yes,I don't know,Yes,I don't know
...,...,...,...,...,...
73,No,No,Yes,Yes,I don't know
74,I don't know,No,Yes,Yes,I don't know
75,I don't know,No,Yes,Yes,I don't know
76,No,No,Yes,Yes,Yes


In [46]:
list1 = list(new_df['Q6.1_1'].values)
new_dict = {}

for i in list1:
  if new_dict.get(i) is not None:
    new_dict[i] += 1
  else:
    new_dict[i] = 1
          
new_df1 = pd.DataFrame(new_dict, index=['Vapes are legal in Singapore'])

new_df1

Unnamed: 0,No,I don't know,Yes
Vapes are legal in Singapore,55,8,7


In [47]:
list2 = list(new_df['Q6.1_2'].values)
new_dict = {}

for i in list2:
  if new_dict.get(i) is not None:
    new_dict[i] += 1
  else:
    new_dict[i] = 1
          
new_df2 = pd.DataFrame(new_dict, index=['Vapes are healthier than smoking'])

new_df2

Unnamed: 0,Yes,No,I don't know
Vapes are healthier than smoking,11,50,9


In [48]:
list3 = list(new_df['Q6.1_3'].values)
new_dict = {}

for i in list3:
  if new_dict.get(i) is not None:
    new_dict[i] += 1
  else:
    new_dict[i] = 1
          
new_df3 = pd.DataFrame(new_dict, index=['E-cigarettes contains nicotine'])

new_df3

Unnamed: 0,Yes,I don't know,No
E-cigarettes contains nicotine,54,13,3


In [49]:
list4 = list(new_df['Q6.1_4'].values)
new_dict = {}

for i in list4:
  if new_dict.get(i) is not None:
    new_dict[i] += 1
  else:
    new_dict[i] = 1
          
new_df4 = pd.DataFrame(new_dict, index=['Can get addicted to e-cigarettes'])

new_df4

Unnamed: 0,Yes,I don't know
Can get addicted to e-cigarettes,66,4


In [50]:
list5 = list(new_df['Q6.1_5'].values)
new_dict = {}

for i in list5:
  if new_dict.get(i) is not None:
    new_dict[i] += 1
  else:
    new_dict[i] = 1
          
new_df5 = pd.DataFrame(new_dict, index=['Vaping causes second-hand smoke'])

new_df5

Unnamed: 0,Yes,I don't know,No
Vaping causes second-hand smoke,34,25,11


In [51]:
knowledge_df = pd.concat([new_df1, new_df2, new_df3, new_df4, new_df5], axis=0)

In [52]:
knowledge_graph = px.bar(knowledge_df, text_auto=True ,labels = {'variable': 'Legend', 'index':'Questions asked', 'value':'count'}, 
                         title = "Knowledge on Vaping", height = 500, width = 600)
knowledge_graph.show()