In [None]:
# analisi dati
import pandas as pd 
import numpy as np
import warnings

# rappresentazione grafica 
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# File Path
DATA_PATH = "C:/Users/ettor/OneDrive/Documenti/Python/Progetti lavoro/Progetto Tesi Barbieri/Input/"
#pd.options.mode.chained_assignment = None 

## ANALISI (pH, Alcol, Acidi, ecc...)

In [None]:
# carico dataset da file csv
bf = pd.read_csv(DATA_PATH + 'Before Fermentation.csv')
af = pd.read_csv(DATA_PATH + 'After Fermentation.csv')

## Grafici a barre per pH, alcol, acidi, ecc...

In [None]:
# Pulizia Dataset
bf = bf.replace(',', '.', regex=True)
af = af.replace(',', '.', regex=True)
bf = bf.sort_values(by='Sample BF (must)')
af = af.sort_values(by='Sample AF(wine)')
bf = bf[bf['Sample BF (must)'].isin(af['Sample AF(wine)'])]
af = af[af['Sample AF(wine)'].isin(bf['Sample BF (must)'])]
bf['Sample BF (must)'] = bf['Sample BF (must)'].astype(str)
af['Sample AF(wine)'] = af['Sample AF(wine)'].astype(str)

In [None]:
# Categorizzazione dati
bf_green_skins = bf[bf['Skin colour'] == 'green']
bf_red_skins = bf[bf['Skin colour'] == 'red']
af_green_skins = af[af['Skin colour'] == 'green']
af_red_skins = af[af['Skin colour'] == 'red']

In [None]:
def grafici(bf_green_skins, bf_red_skins, af_green_skins, af_red_skins, column):

    plt.figure(figsize=(20,15))

    ax0 = plt.subplot2grid((2, 6), (0, 0), colspan=2)
    ax1 = plt.subplot2grid((2, 6), (0, 2), colspan=4)
    ax2 = plt.subplot2grid((2, 6), (1, 0), colspan=2)
    ax3 = plt.subplot2grid((2, 6), (1, 2), colspan=4)
    
    
    car1 = bf_green_skins[column].astype(float)
    altezze, etichette = zip(*sorted(zip(car1, bf_green_skins['Sample BF (must)'])))
    ax0.bar(etichette, altezze, color = 'green')
    ax0.set_xticks(range(len(etichette)))
    ax0.set_xticklabels(etichette, rotation=45)
    ax0.set_title('Green Skin')
    ax0.set_ylabel('Before Fermentation')

    car2 = bf_red_skins[column].astype(float)
    altezze, etichette = zip(*sorted(zip(car2, bf_red_skins['Sample BF (must)'])))
    ax1.bar(etichette, altezze, color = bf_red_skins['Flesh colour'])
    ax1.set_xticks(range(len(etichette)))
    ax1.set_xticklabels(etichette, rotation=45)
    ax1.set_title('Red Skin')

    car3 = af_green_skins[column].astype(float)
    altezze, etichette = zip(*sorted(zip(car3, af_green_skins['Sample AF(wine)'])))
    ax2.bar(etichette, altezze, color = 'green')
    ax2.set_xticks(range(len(etichette)))
    ax2.set_xticklabels(etichette, rotation=45)
    ax2.set_ylabel('After Fermentation')

    car4 = af_red_skins[column].astype(float)
    altezze, etichette = zip(*sorted(zip(car4, af_red_skins['Sample AF(wine)'])))
    ax3.bar(etichette, altezze, color = af_red_skins['Flesh colour'])
    ax3.set_xticks(range(len(etichette)))
    ax3.set_xticklabels(etichette, rotation = 45)

    plt.suptitle(column)
    plt.tight_layout()
    plt.show()

In [None]:
bf_loop = bf.drop(['Sample BF (must)','Skin colour','Flesh colour'], axis=1)

for column in bf_loop.columns:
    grafici(bf_green_skins, bf_red_skins,
            af_green_skins, af_red_skins,column)

## BoxPlot per pH, Acidi, Alcol, ecc...

In [None]:
# manipolazione dati bf
valore_costante = 'BF'
nuova_colonna_nome = 'Juice/Wine'
colonna_prima = 'Sample BF (must)'
colonna_dopo = 'Skin colour'
indice_colonna_dopo = bf.columns.get_loc(colonna_dopo)
bf.insert(indice_colonna_dopo, nuova_colonna_nome, valore_costante)

In [None]:
# manipolazione dati af
valore_costante = 'AF'
nuova_colonna_nome = 'Juice/Wine'
colonna_prima = 'Sample AF(wine)'
colonna_dopo = 'Skin colour'
indice_colonna_dopo = af.columns.get_loc(colonna_dopo)
af.insert(indice_colonna_dopo, nuova_colonna_nome, valore_costante)


In [None]:
# unione 
new_bf = bf.drop(['Sample BF (must)'], axis=1)
new_af = af.drop(['Sample AF(wine)'], axis=1)
merged_df = pd.concat([new_bf, new_af], axis=0, ignore_index=True)
pd.set_option('display.max_rows', None)

In [None]:
# categorizzazione merged_df
merged_df_green_skins = merged_df[merged_df['Skin colour'] == 'green']
merged_df_red_skins = merged_df[merged_df['Skin colour'] == 'red']

merged_df_green_skins_green_flesh = merged_df_green_skins[merged_df_green_skins['Flesh colour']=='green']
merged_df_green_skins_red_flesh = merged_df_green_skins[merged_df_green_skins['Flesh colour']=='red']
merged_df_red_skins_green_flesh = merged_df_red_skins[merged_df_red_skins['Flesh colour']=='green']
merged_df_red_skins_red_flesh = merged_df_red_skins[merged_df_red_skins['Flesh colour']=='red']

In [None]:
# funzione boxplott per merged_df
def grafici_box(merged_df_green_skins_green_flesh, merged_df_green_skins_red_flesh, merged_df_red_skins_green_flesh, merged_df_red_skins_red_flesh, column):
    
    car1 = merged_df_green_skins_green_flesh[column].astype(float)
    sort1 =  car1.sort_values()
    fig1 = px.box(merged_df_green_skins_green_flesh, x='Juice/Wine', y = sort1)
    
    car2 = merged_df_red_skins_green_flesh[column].astype(float)
    sort2 =  car2.sort_values()
    fig2 = px.box(merged_df_red_skins_green_flesh, x='Juice/Wine', y = sort2)
    
    car3 = merged_df_green_skins_red_flesh[column].astype(float)
    sort3 =  car3.sort_values()
    fig3 = px.box(merged_df_green_skins_red_flesh, x='Juice/Wine', y = sort3)
    
    car4 = merged_df_red_skins_red_flesh[column].astype(float)
    sort4 =  car4.sort_values()
    fig4 = px.box(merged_df_red_skins_red_flesh, x='Juice/Wine', y = sort4)
    
    fig = make_subplots(rows=2, cols=2, subplot_titles=('Green Skin', "Red skin"))
    
    # Aggiungi i grafici alle posizioni desiderate nella griglia
    fig.add_trace(fig1.data[0], row=1, col=1)
    fig.add_trace(fig2.data[0], row=1, col=2)
    fig.add_trace(fig3.data[0], row=2, col=1)
    fig.add_trace(fig4.data[0], row=2, col=2)
    
    # Imposto il layout
    fig.update_layout(title_text=column, height = 800, width = 1200, template='plotly_white')
    fig.update_yaxes(title_text="Green Flesh", row=1, col=1)
    fig.update_yaxes(title_text="Red Flesh", row=2, col=1)

    fig.show()

In [None]:
# flusso dati per funzione grafici_box 
merged_df_loop = merged_df.drop(['Juice/Wine', 'Skin colour','Flesh colour'], axis=1)
for column in merged_df_loop.columns:
    grafici_box(merged_df_green_skins_green_flesh, merged_df_green_skins_red_flesh,
                 merged_df_red_skins_green_flesh, merged_df_red_skins_red_flesh,column)

## ANALISI MOLECOLE

In [None]:
pd.set_option('display.max_rows', None)
um = pd.read_csv(DATA_PATH + 'Ultime molecole_nuovo.csv')
um = um.replace(',', '.', regex=True)

## Grafici a Barre per Molecole

In [None]:
# Manipolazione dati e pulizia
um_bf = um[um['Juice/Wine'] == 'BF']
um_af = um[um['Juice/Wine'] == 'AF']
um_bf = um_bf.sort_values(by='Sample Number')
um_af = um_af.sort_values(by='Sample Number')
um_bf['Sample Number'] = um_bf['Sample Number'].astype(str)
um_af['Sample Number'] = um_af['Sample Number'].astype(str)

In [None]:
# Categorizzazione dati
um_bf_green_skins = um_bf[um_bf['Skin colour'] == 'green']
um_bf_red_skins = um_bf[um_bf['Skin colour'] == 'red']
um_af_green_skins = um_af[um_af['Skin colour'] == 'green']
um_af_red_skins = um_af[um_af['Skin colour'] == 'red']

In [None]:
def grafici_um(um_bf_green_skins, um_bf_red_skins, um_af_green_skins, um_af_red_skins, column):

    plt.figure(figsize=(20,15))

    ax0 = plt.subplot2grid((2, 6), (0, 0), colspan=2)
    ax1 = plt.subplot2grid((2, 6), (0, 2), colspan=4)
    ax2 = plt.subplot2grid((2, 6), (1, 0), colspan=2)
    ax3 = plt.subplot2grid((2, 6), (1, 2), colspan=4)
    
    mol1 = um_bf_green_skins[column].astype(float)
    altezze, etichette = zip(*sorted(zip(mol1, um_bf_green_skins['Sample Number'])))
    ax0.bar(etichette, altezze, color = 'green')
    ax0.set_xticks(range(len(etichette)))
    ax0.set_xticklabels(etichette, rotation=45)
    ax0.set_title('Green Skin')
    ax0.set_ylabel('Before Fermentation')

    mol2 = um_bf_red_skins[column].astype(float)
    altezze, etichette = zip(*sorted(zip(mol2, um_bf_red_skins['Sample Number'])))
    ax1.bar(etichette, altezze, color = um_bf_red_skins['Flesh colour'])
    ax1.set_xticks(range(len(etichette)))
    ax1.set_xticklabels(etichette, rotation=45)
    ax1.set_title('Red Skin')

    mol3 = um_af_green_skins[column].astype(float)
    altezze, etichette = zip(*sorted(zip(mol3, um_af_green_skins['Sample Number'])))
    ax2.bar(etichette, altezze, color = 'green')
    ax2.set_xticks(range(len(etichette)))
    ax2.set_xticklabels(etichette, rotation=45)
    ax2.set_ylabel('After Fermentation')

    mol4 = um_af_red_skins[column].astype(float)
    altezze, etichette = zip(*sorted(zip(mol4, um_af_red_skins['Sample Number'])))
    ax3.bar(etichette, altezze, color = um_af_red_skins['Flesh colour'])
    ax3.set_xticks(range(len(etichette)))
    ax3.set_xticklabels(etichette, rotation = 45)

    plt.suptitle(column)
    plt.tight_layout()
    plt.show()

In [None]:
um_bf_loop = um_bf.drop(['Sample Number','Juice/Wine', 'Skin colour','Flesh colour'], axis=1)
for column in um_bf_loop.columns:
    grafici_um(um_bf_green_skins, um_bf_red_skins,
            um_af_green_skins, um_af_red_skins, column)

## BoxPlot per Molecole

In [None]:
um_green_skins = um[um['Skin colour'] == 'green']
um_red_skins = um[um['Skin colour'] == 'red']

um_green_skins_green_flesh = um_green_skins[um_green_skins['Flesh colour']=='green']
um_green_skins_red_flesh = um_green_skins[um_green_skins['Flesh colour']=='red']
um_red_skins_green_flesh = um_red_skins[um_red_skins['Flesh colour']=='green']
um_red_skins_red_flesh = um_red_skins[um_red_skins['Flesh colour']=='red']

In [None]:
def grafici_box_um(um_green_skins_green_flesh, um_green_skins_red_flesh,um_red_skins_green_flesh,um_red_skins_red_flesh, column):
    
    mol1 = um_green_skins_green_flesh[column].astype(float)
    sort1 =  mol1.sort_values()
    fig1 = px.box(um_green_skins_green_flesh, x='Juice/Wine', y = sort1)
    
    mol2 = um_red_skins_green_flesh[column].astype(float)
    sort2 =  mol2.sort_values()
    fig2 = px.box(um_red_skins_green_flesh, x='Juice/Wine', y = sort2)
    
    mol3 = um_green_skins_red_flesh[column].astype(float)
    sort3 =  mol3.sort_values()
    fig3 = px.box(um_green_skins_red_flesh, x='Juice/Wine', y = sort3)
    
    mol4 = um_red_skins_red_flesh[column].astype(float)
    sort4 =  mol4.sort_values()
    fig4 = px.box(um_red_skins_red_flesh, x='Juice/Wine', y = sort4)
    
    fig = make_subplots(rows=2, cols=2, subplot_titles=('Green Skin', "Red skin"))
    
    # Aggiungi i grafici alle posizioni desiderate nella griglia
    fig.add_trace(fig1.data[0], row=1, col=1)
    fig.add_trace(fig2.data[0], row=1, col=2)
    fig.add_trace(fig3.data[0], row=2, col=1)
    fig.add_trace(fig4.data[0], row=2, col=2)
    
    # Imposto il layout
    fig.update_layout(title_text=column, height = 800, width = 1200, template = 'plotly_white')
    fig.update_yaxes(title_text="Green Flesh", row=1, col=1)
    fig.update_yaxes(title_text="Red Flesh", row=2, col=1)

    fig.show()

In [None]:
um_loop = um.drop(['Sample Number','Juice/Wine', 'Skin colour','Flesh colour'], axis=1)
for column in um_loop.columns:
    grafici_box_um(um_green_skins_green_flesh, um_green_skins_red_flesh,
            um_red_skins_green_flesh,um_red_skins_red_flesh,
             column)