In [1]:
import os

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

sns.set_style("darkgrid")

# Fixed values and functions for all plots

In [2]:
# # Loads algae names
# with open('../../fixed_values-pickles/pickle_files/algae_names.pkl', 'rb') as fp:
# 	algae_names = pickle.load(fp)

# # Loads content types (ashes, moisture, ...)
# with open('../../fixed_values-pickles/pickle_files/content_types.pkl', 'rb') as fp:
# 	content_types = pickle.load(fp)

# control_color = '#FFFFFF' # white
# xlabel = 'Burger type'
# ylabel = 'Percentage of '

# def get_contenttype(in_ct):
# 	content_type = content_types[in_ct]
# 	return content_type

# def set_filename(alga, ct):
# 	filename = alga + '_' + ct
# 	return filename

In [3]:
def csv_to_dic(csv_file):
	'''Funzione che inserisce i dati dei csv delle buy responses in un dizionario 
		{burger_type: { buy_response : occurrences }, ... }
	formattando il tutto'''

	# Given a series/column, makes a dic {answer : counts}
	def occurrences_dic(column):
		return column.value_counts().to_dict()

	# Load the CSV file into a DataFrame
	df = pd.read_csv(csv_file)

	# Counts the columns
	n_columns = len(df.columns)

	# Initialize the dictionary
	dic = {}

	# Iterates for every column of the csv
	for column in range(n_columns):

		# Selects only the current column
		sel_column = df.iloc[:, column]
		
		# If the csv is a "buy type" (the ones with more than one column)
		if n_columns != 1:

			# Get and format the header
			header = sel_column.name
			header = header[-3:-1] # Write [-2:-1] for only the letter

			# Makes dic {answer : counts}
			occurrences = occurrences_dic(sel_column)
			dic[header] = occurrences
		
		# If the csv is a "prefer type" (the ones with only one column)
		else:
			dic = occurrences_dic(sel_column)

	return dic

In [4]:
def pieplot(data, filename, colors):
	'''Crea un pieplot per ...'''

	# Definizione dati
	labels = data.keys()
	values = data.values()

	# Calcola la percentuale
	def make_autopct(values):
		def my_autopct(pct):
			total = sum(values)
			val = int(round(pct*total/100.0))
			return '{p:.1f}%  ({v:d})'.format(p=pct,v=val)
		return my_autopct


	# Colori
	if filename != 'diet':
		for alga in colors:
			current_colors = list(colors[alga].values())
	else:
		current_colors = colors
			
	# Crea il pieplot
	plt.pie(values, labels = labels, colors=current_colors, autopct=make_autopct(values))
	plt.savefig('google_forms-outs/' + filename + '.pdf', format="pdf", bbox_inches='tight')
	plt.close()

# Diets (personal info)

In [5]:
diet_color_1 = '#FFF9C4' # 
diet_color_2 = '#DDEB9E' # 
diet_colors = [diet_color_1, diet_color_2]

# Burger samples (sensory analysis)

## Fixed values and functions for burger samples (sensory analysis)

In [7]:
preferences_colors = {
	'1' : { # Spirulina
			'color_1' : '#F7CBDA',
			'color_2' : '#F7CBDA',
			'color_3' : '#F7CBDA'
			},
	'2' : { # Chlorella
			'color_1' : '#F7CBDA',
			'color_2' : '#F7CBDA',
			'color_3' : '#F7CBDA'
			},
	'3' : { # Palmaria
			'color_1' : '#F7CBDA',
			'color_2' : '#F7CBDA'
			},
}

{'1': {'color_1': '#F7CBDA', 'color_2': '#F7CBDA', 'color_3': '#F7CBDA'},
 '2': {'color_1': '#F7CBDA', 'color_2': '#F7CBDA', 'color_3': '#F7CBDA'},
 '3': {'color_1': '#F7CBDA', 'color_2': '#F7CBDA'}}

In [8]:
buy_color_1 = '#FFF9C4' # 
buy_color_2 = '#DDEB9E' # 
buy_color_3 = '#BBDD77' # 
buy_color_4 = '#004225' # 
buy_color_5 = '#800020' #
buy_colors = [
	buy_color_1, buy_color_2, buy_color_3, \
	# buy_color_4, buy_color_5
] 

In [9]:
buy_categories = {
	'cat_1': ['Certainly would not buy', 'Probably would not buy'],
	'cat_2': ['Maybe would buy'],
	'cat_3': ['Probably would buy', 'Certainly would buy']
}

In [10]:
def gather_buy_responses(dic, buy_categories):
	'''
	Funzione che a partire da un dizionario
		{ buy_response : occurrences, ... }
	e un dizionario
		{ nome_categoria : [buy_response_1, ...] }
	restituisce un dizionario con:
		chiavi -> le buy_responses raggruppate per categoria
		valori -> i conteggi di quella categoria di buy_responses
	'''
	sr = pd.Series(dic)

	gathered_categories = {}

	# Iterate for every category
	for category in buy_categories:

		# Compute occurrences of every category
		sum = sr.loc[buy_categories[category]].sum()
		gathered_categories[category] = sum

	return gathered_categories

In [11]:
def barplot(data, filename, buy_categories, colors):

	for burger_type in data:
		# burger_type is like 1A, 1B, 2A, 2B, etc...
		
		# Gathers responses into positive, negative, etc...
		data_gathered = gather_buy_responses(data[burger_type], buy_categories)

		x = data_gathered.keys()
		y = data_gathered.values()

		barplot = sns.barplot(x=x, y=y, errorbar=None)

		for bar, color in zip(barplot.patches, colors):
			bar.set_color(color)
			bar.set_edgecolor('#000000')
			bar.set_linewidth(1)

		plt.ylabel('Number of reponses')

		plt.savefig('google_forms-outs/' + filename + '.pdf', format="pdf", bbox_inches='tight')
		plt.close()

# Let's make the plots

In [15]:
data_path = 'data-google_forms/structured/'

for csv in os.listdir(data_path):
	
	dic = csv_to_dic(data_path + csv)
	
	if csv == 'diet.csv':
		pieplot(dic, csv[:-4], diet_colors)
	
	elif csv.endswith('-prefer.csv'):
		pieplot(dic, csv[:-4], preferences_colors)
	
	elif csv.endswith('-buy.csv'):
		barplot(dic, csv[:-4], buy_categories, buy_colors)