# Analyse data
From https://github.com/alex-d-bondarev/neu-capstone

## Data preparation

### Load libraries
Need to import libraries **only once** each time jupiter notebook is launched

In [None]:
import pandas

from ipyfilechooser import FileChooser

import sys 
import os

abs_path = os.path.abspath(os.path.join('..'))
sys.path.insert(0, abs_path)

from src.common_helpers import make_text_pandas_header_compatible
from src.extended_pandas_series import ExtendedSeries
from src.plot_helpers import plot_bar_chart, plot_1_10_hist_chart, plot_text_answer, plot_multichoice_with_other


### Read the file

In [None]:
# open excel file as pandas dataframe
raw_data = FileChooser()
display(raw_data)

In [None]:
# open excel file as pandas dataframe
file_path = raw_data.selected

raw_df = pandas.read_excel(io=file_path,
                           sheet_name='Form1')

# remove illegal characters from column names
headers_list = raw_df.columns.values.tolist()
headers_list = list(map(make_text_pandas_header_compatible, headers_list))
raw_df.columns = headers_list

necessary_columns = [
    'Have you hired Big Sky Franchise Team',
    'How likely would you recommend Big Sky Franchise Team to a friend or colleague 1 is not at all likely and 10 is extremelylikely',
    'If you were to do it all over would you hire Big Sky again',
    'If no could you please tell why',
    'What year did you start franchising',
    'How many total franchises have you sold since you started franchisingif not applicable enter NA',
    'How many franchises did you sell in your first yearif not applicable enter NA',
    'How many franchises did you sell in your second year if not applicable enter NA',
    'How many franchises did you sell in your third yearif not applicable enter NA',
    'What were your biggest challenges or obstacles to launching your franchise program',
    'What were your biggest challenges to selling franchises',
    'What have been your biggest challenges in supporting your franchisees select all that apply2',
    'What areas do you need help with Select all that apply',
    'Would you like to participate in an optional interview to share more insights',
    'Please share your email so that we may contact you for an optional interview Leave empty if you prefer to be anonymous',
    'Business Name Leave empty if you prefer to be anonymous',
]

df = raw_df.filter(items=necessary_columns, axis=1)


## General survey analysis

### 1. Have you hired Big Sky Franchise Team?

In [None]:
column_name = 'Have you hired Big Sky Franchise Team'
plot_bar_chart(df[column_name])

### 2. How likely would you recommend Big Sky Franchise Team to a friend or colleague? 

In [None]:
column_name = 'How likely would you recommend Big Sky Franchise Team to a friend or colleague 1 is not at all likely and 10 is extremelylikely'
plot_1_10_hist_chart(df[column_name])

### 3. If you were to do it all over, would you hire Big Sky again?

In [None]:
column_name = 'If you were to do it all over would you hire Big Sky again'
plot_bar_chart(ExtendedSeries(df[column_name]).filter_out_nan())

### 4. If no, could you please tell why?

In [None]:
column_name = 'If no could you please tell why'
plot_text_answer(df[column_name], top=3)

### 5. What year did you start franchising?

In [None]:
column_name = 'What year did you start franchising'
plot_bar_chart(df[column_name])

### 6. How many total franchises have you sold sold since you started franchising?

In [None]:
column_name = 'How many total franchises have you sold since you started franchisingif not applicable enter NA'
plot_bar_chart(df[column_name])

### 7. How many franchises did you sell in your first year?

In [None]:
column_name = 'How many franchises did you sell in your first yearif not applicable enter NA'
plot_bar_chart(df[column_name])

### 8. How many franchises did you sell in your second year?

In [None]:
column_name = 'How many franchises did you sell in your second year if not applicable enter NA'
plot_bar_chart(df[column_name])

### 9. How many franchises did you sell in your third year?

In [None]:
column_name = 'How many franchises did you sell in your third yearif not applicable enter NA'
plot_bar_chart(df[column_name])

### 10. What were your biggest challenges or obstacles to launching your franchise program?

In [None]:
column_name = 'What were your biggest challenges or obstacles to launching your franchise program'
plot_text_answer(df[column_name], top=3)

### 11. What were your biggest challenges to selling franchises?

In [None]:
column_name = 'What were your biggest challenges to selling franchises'
plot_text_answer(df[column_name], top=3)

### 12. What have been your biggest challenges in supporting your franchisees

In [None]:
column_name = 'What have been your biggest challenges in supporting your franchisees select all that apply2'
expected_values = [
	'None', 'Their motivation', 'They did not follow system', 'Their finances and financial management', 
	'Lack of marketing spend by franchisee', 'Quality issues', 
	'My (or franchisor) coaching skills', 'Miscommunication'
]
plot_multichoice_with_other(series=df[column_name], main_values=expected_values, top=5)

### 13. What areas do you need help with?

In [None]:
column_name = 'What areas do you need help with Select all that apply'
expected_values = [
	'None', 'Whole franchising process', 'Business plan', 'Financial processes', 
	'Competitors analysis', 'Legal documentation', 'Licensing documentation', 
	'Marketing', 'Quality Assurance', 'Franchisee training processes', 'Technical support'
]
plot_multichoice_with_other(series=df[column_name], main_values=expected_values, top=2)

### 14/15/16. Private data

# Analyse non-clients

In [None]:
column_name = 'Have you hired Big Sky Franchise Team'
non_df = df[df[column_name] == 'Have not hired']

### 13. What areas do you need help with?

In [None]:
column_name = 'What areas do you need help with Select all that apply'
expected_values = [
	'None', 'Whole franchising process', 'Business plan', 'Financial processes', 
	'Competitors analysis', 'Legal documentation', 'Licensing documentation', 
	'Marketing', 'Quality Assurance', 'Franchisee training processes', 'Technical support'
]
plot_multichoice_with_other(series=non_df[column_name], main_values=expected_values, top=2)

## Analyse past clients

In [None]:
column_name = 'Have you hired Big Sky Franchise Team'
past_df = df[df[column_name] == 'Previously hired']

### 2. How likely would you recommend Big Sky Franchise Team to a friend or colleague? 

In [None]:
column_name = 'How likely would you recommend Big Sky Franchise Team to a friend or colleague 1 is not at all likely and 10 is extremelylikely'
plot_1_10_hist_chart(past_df[column_name])

### 3. If you were to do it all over, would you hire Big Sky again?

In [None]:
column_name = 'If you were to do it all over would you hire Big Sky again'
plot_bar_chart(ExtendedSeries(past_df[column_name]).filter_out_nan())

### 4. If no, could you please tell why?

In [None]:
column_name = 'If no could you please tell why'
plot_text_answer(past_df[column_name], top=3)

### 5. What year did you start franchising?

In [None]:
column_name = 'What year did you start franchising'
plot_bar_chart(past_df[column_name])

### 6. How many total franchises have you sold sold since you started franchising?

In [None]:
column_name = 'How many total franchises have you sold since you started franchisingif not applicable enter NA'
plot_bar_chart(past_df[column_name])

### 10. What were your biggest challenges or obstacles to launching your franchise program?

In [None]:
column_name = 'What were your biggest challenges or obstacles to launching your franchise program'
plot_text_answer(past_df[column_name], top=3)

### 11. What were your biggest challenges to selling franchises?

In [None]:
column_name = 'What were your biggest challenges to selling franchises'
plot_text_answer(past_df[column_name], top=3)

### 12. What have been your biggest challenges in supporting your franchisees

In [None]:
column_name = 'What have been your biggest challenges in supporting your franchisees select all that apply2'
expected_values = [
	'None', 'Their motivation', 'They did not follow system', 'Their finances and financial management', 
	'Lack of marketing spend by franchisee', 'Quality issues', 
	'My (or franchisor) coaching skills', 'Miscommunication'
]
plot_multichoice_with_other(series=past_df[column_name], main_values=expected_values, top=5)

### 13. What areas do you need help with?

In [None]:
column_name = 'What areas do you need help with Select all that apply'
expected_values = [
	'None', 'Whole franchising process', 'Business plan', 'Financial processes', 
	'Competitors analysis', 'Legal documentation', 'Licensing documentation', 
	'Marketing', 'Quality Assurance', 'Franchisee training processes', 'Technical support'
]
plot_multichoice_with_other(series=past_df[column_name], main_values=expected_values, top=2)

## Analyse current clients

In [None]:
column_name = 'Have you hired Big Sky Franchise Team'
now_df = df[df[column_name] == 'Currently hired']

### 2. How likely would you recommend Big Sky Franchise Team to a friend or colleague? 

In [None]:
column_name = 'How likely would you recommend Big Sky Franchise Team to a friend or colleague 1 is not at all likely and 10 is extremelylikely'
plot_1_10_hist_chart(now_df[column_name])

### 3. If you were to do it all over, would you hire Big Sky again?

In [None]:
column_name = 'If you were to do it all over would you hire Big Sky again'
plot_bar_chart(ExtendedSeries(now_df[column_name]).filter_out_nan())

### 4. If no, could you please tell why?

In [None]:
column_name = 'If no could you please tell why'
plot_text_answer(now_df[column_name], top=3)

### 5. What year did you start franchising?

In [None]:
column_name = 'What year did you start franchising'
plot_bar_chart(now_df[column_name])

### 6. How many total franchises have you sold sold since you started franchising?

In [None]:
column_name = 'How many total franchises have you sold since you started franchisingif not applicable enter NA'
plot_bar_chart(now_df[column_name])

### 10. What were your biggest challenges or obstacles to launching your franchise program?

In [None]:
column_name = 'What were your biggest challenges or obstacles to launching your franchise program'
plot_text_answer(now_df[column_name], top=3)

### 11. What were your biggest challenges to selling franchises?

In [None]:
column_name = 'What were your biggest challenges to selling franchises'
plot_text_answer(now_df[column_name], top=3)

### 12. What have been your biggest challenges in supporting your franchisees

In [None]:
column_name = 'What have been your biggest challenges in supporting your franchisees select all that apply2'
expected_values = [
	'None', 'Their motivation', 'They did not follow system', 'Their finances and financial management', 
	'Lack of marketing spend by franchisee', 'Quality issues', 
	'My (or franchisor) coaching skills', 'Miscommunication'
]
plot_multichoice_with_other(series=now_df[column_name], main_values=expected_values, top=5)

### 13. What areas do you need help with?

In [None]:
column_name = 'What areas do you need help with Select all that apply'
expected_values = [
	'None', 'Whole franchising process', 'Business plan', 'Financial processes', 
	'Competitors analysis', 'Legal documentation', 'Licensing documentation', 
	'Marketing', 'Quality Assurance', 'Franchisee training processes', 'Technical support'
]
plot_multichoice_with_other(series=now_df[column_name], main_values=expected_values, top=2)