# Google forms

## Import libraries

In [1]:
import os

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

sns.set_style("darkgrid")

In [2]:
# Font setting

import matplotlib.font_manager as font_manager

font_path = '../../Minion3/Minion3-Regular.ttf'  # Your font path goes here
font_manager.fontManager.addfont(font_path)
prop = font_manager.FontProperties(fname=font_path)

plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = prop.get_name()

# TODO Constant values for all plots

In [3]:
# # Loads algae names
# with open('../../fixed_values-pickles/pickle_files/algae_names.pkl', 'rb') as fp:
# 	algae_names = pickle.load(fp)

# # Loads content types (ashes, moisture, ...)
# with open('../../fixed_values-pickles/pickle_files/content_types.pkl', 'rb') as fp:
# 	content_types = pickle.load(fp)

# control_color = '#FFFFFF' # white
# xlabel = 'Burger type'
# ylabel = 'Percentage of '

# def get_contenttype(in_ct):
# 	content_type = content_types[in_ct]
# 	return content_type

# def set_filename(alga, ct):
# 	filename = alga + '_' + ct
# 	return filename

## Define functions

In [4]:
def csv_to_dic(csv_file):
	'''Funzione che inserisce i dati dei csv delle buy responses in un dizionario 
		{burger_type: { buy_response : occurrences }, ... }
	formattando il tutto'''

	# Given a series/column, makes a dic {answer : counts}
	def occurrences_dic(column):
		return column.value_counts().to_dict()

	# Load the CSV file into a DataFrame
	df = pd.read_csv(csv_file)

	# Counts the columns
	n_columns = len(df.columns)

	# Initialize the dictionary
	dic = {}

	# Iterates for every column of the csv
	for column in range(n_columns):

		# Selects only the current column
		sel_column = df.iloc[:, column]
		
		# If the csv is a "buy type" (the ones with more than one column)
		if n_columns != 1:

			# Get and format the header
			header = sel_column.name
			header = header[-3:-1] # Write [-2:-1] for only the letter

			# Makes dic {answer : counts}
			occurrences = occurrences_dic(sel_column)
			dic[header] = occurrences
		
		# If the csv is a "prefer type" (the ones with only one column)
		else:
			dic = occurrences_dic(sel_column)

	return dic

# Diets (personal info)

In [5]:
diet_color_1 = '#A8E6CF' # 
diet_color_2 = '#D4F0CA' # 
diet_colors = [diet_color_1, diet_color_2]

# Burger samples (sensory analysis)

## Constant values and functions for burger samples (sensory analysis)

In [6]:
preferences_colors = {
	'2' : { # Spirulina
			'color_1' : '#C5E1A5',
			'color_2' : '#7FB871',
			'color_3' : '#388E3C'
			},
	'3' : { # Chlorella
			'color_1' : '#FFF9C4',
			'color_2' : '#FDDD79',
			'color_3' : '#FBC02D'
			},
	'1' : { # Palmaria
			'color_1' : '#F7CBDA',
			'color_2' : '#F7AFC8'
			},
}

In [7]:
buy_color_1 = '#FFF9C4' # 
buy_color_2 = '#D4F0CA' # 
buy_color_3 = '#A8E6CF' # 
buy_color_4 = '#004225' # 
buy_color_5 = '#800020' #
buy_colors = [
	buy_color_1, buy_color_2, buy_color_3, \
	# buy_color_4, buy_color_5
] 

In [8]:
buy_categories = {
	'Not buy': ['Certainly would not buy', 'Probably would not buy'],
	'Maybe buy': ['Maybe would buy'],
	'Buy': ['Probably would buy', 'Certainly would buy']
}

## Declare functions

In [9]:
def gather_buy_responses(dic, buy_categories):
	'''
	Funzione che a partire da un dizionario
		{ buy_response : occurrences, ... }
	e un dizionario
		{ nome_categoria : [buy_response_1, ...] }
	restituisce un dizionario con:
		chiavi -> le buy_responses raggruppate per categoria
		valori -> i conteggi di quella categoria di buy_responses
	'''
	sr = pd.Series(dic)

	gathered_categories = {}

	# Iterate for every category
	for category in buy_categories:

		# Compute occurrences of every category
		sum = sr.loc[buy_categories[category]].sum()
		gathered_categories[category] = sum

	return gathered_categories

In [10]:
def barplot(data, filename, buy_categories, colors):

	for burger_type in data:
		# burger_type is like 1A, 1B, 2A, 2B, etc...
		
		# Gathers responses into positive, negative, etc...
		data_gathered = gather_buy_responses(data[burger_type], buy_categories)
		print(data_gathered)

		# x = data_gathered.keys()
		# y = data_gathered.values()

		# barplot = sns.barplot(x=x, y=y, errorbar=None)

		# for bar, color in zip(barplot.patches, colors):
		# 	bar.set_color(color)
		# 	bar.set_edgecolor('#000000')
		# 	bar.set_linewidth(1)

		# plt.ylabel('Number of reponses')

## Loop that creates the plots

In [11]:
data_path = 'data-google_forms/structured/'

for csv in os.listdir(data_path):
	
	dic = csv_to_dic(data_path + csv)
	
	if csv == 'diet.csv':
		print('ok')
	
	elif csv.endswith('-prefer.csv'):
		alga = csv[-12]
		current_colors = list(preferences_colors[alga].values())
		print('ok')
	
	elif csv.endswith('-buy.csv'):
		print(csv[:-4])
		# barplot(dic, csv[:-4], buy_categories, buy_colors)

sample_3-buy
ok
sample_1-buy
ok
sample_2-buy
ok
ok
