In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

In [None]:
pd.options.display.max_columns = 50
pd.options.display.max_colwidth = 100
pd.options.display.max_rows = 500

In [None]:
# Data with ingredient counts per year
df = pd.read_csv('../data/ingredient_data.csv')

In [None]:
df.head(1)

In [None]:
# Data with recipes counts per year
df1 = pd.read_csv('../data/recipes_per_year.csv')

In [None]:
total = df1.nr_recipes.sum()

In [None]:
df['percentages'] = (df.counts/total*100).round(1)

In [None]:
df.head(1)

In [None]:
dff = df.iloc[:5]
ing = dff.T
ing.columns = ing.iloc[0]
ing1 = ing.iloc[4:24].reset_index().rename(columns={'index': 'year'})

In [None]:
ing1

In [None]:
melt = ing1.melt(id_vars='year', value_name='recipes_number')
ing_per = ing1.loc[:, 'butter':].apply(lambda x: x/df1.nr_recipes.iloc[:-1]*100).fillna(0)
ing_per = ing_per.round(1)
ing_per['year'] = ing1.year
melt_perc = ing_per.melt(id_vars='year', value_name='recipes_percentage')
melt['recipes_percentage'] = melt_perc['recipes_percentage']
melt['recipes_number'] = melt.loc[:, 'recipes_number'].astype(int)

In [None]:
# melt1 = melt[melt.ingredients.isin(['onion', 'garlic', 'butter', 'banana'])]

In [None]:
colors = ['#8dd3c7','#bc80bd','#fb8072','#b3de69','#fdb462']

fig = px.scatter(melt, x="year", y="recipes_percentage", size='recipes_number', 
                 color='ingredients', size_max=40, template='ggplot2', color_discrete_sequence=colors)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0}, yaxis_title='Recipes %', xaxis_title='Year')
fig.show()

In [None]:
ing1.head(5)

In [None]:
df1.head()

In [None]:

fig = px.area(df1, x='year', y='nr_recipes', color_discrete_sequence=['#bc80bd'], template='ggplot2')
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0}, yaxis_title='Number of Recipes', xaxis_title='Year')
fig.show()

In [None]:
#https://plotly.com/python/continuous-error-bars/

fig = go.Figure([
    go.Scatter(
        name='Average',
        x=df1['year'],
        y=df1['ingredients_avrg'],
        mode='lines',
        line=dict(color='#fb8072', width=4),
    ),
    go.Scatter(
        name='Upper Bound',
        x=df1['year'],
        y=df1['ingredients_avrg']+df1['ingredients_std'],
        mode='lines',
        marker=dict(color="#444"),
        line=dict(width=0),
        showlegend=False
    ),
    go.Scatter(
        name='Lower Bound',
        x=df1['year'],
        y=df1['ingredients_avrg']-df1['ingredients_std'],
        marker=dict(color='#fb8072'),
        line=dict(width=0),
        mode='lines',
        fillcolor='rgba(252, 205, 229, 0.4)',
        fill='tonexty',
        showlegend=False
    )
])
fig.update_layout(
    xaxis_title='Year',
    yaxis_title='Number of Ingredients',
    title='Average number of Ingredients per Recipe',
    hovermode="x", template='ggplot2',
)
fig.show()

In [None]:
df1.shape

In [None]:
fig = go.Figure([
    go.Scatter(
        name='Average',
        x=df1['year'],
        y=df1['calories_median'],
        mode='lines',
        line=dict(color='#fb8072', width=4),
    ),
    go.Scatter(
        name='Upper Bound',
        x=df1['year'],
        y=df1['calories_median']+df1['calories_std'],
        mode='lines',
        marker=dict(color="#444"),
        line=dict(width=0),
        showlegend=False
    ),
    go.Scatter(
        name='Lower Bound',
        x=df1['year'],
        y=df1['calories_median']-df1['calories_std'],
        marker=dict(color='#fdb462'),
        line=dict(width=0),
        mode='lines',
        fillcolor='rgba(255, 255, 179, 0.5)',
        fill='tonexty',
        showlegend=False
    )
])
fig.update_layout(
    xaxis_title='Year',
    yaxis_title='Calories',
    title='Median Calories per Recipe',
    hovermode="x", template='ggplot2',
)
fig.show()

In [None]:
data = df.iloc[:5]
perc = data.percentages.apply(lambda x: str(x) + ' %')
perc.name = 'percent'
data = pd.concat([data, perc], axis=1)

In [None]:
df

In [None]:
colors = ['#8dd3c7','#ffffb3','#bebada','#fb8072','#80b1d3','#fdb462','#b3de69','#fccde5','#d9d9d9','#bc80bd','#ccebc5','#ffed6f']

sns.color_palette(colors)

In [None]:
for name, x, y in zip(data.ingredients, data.counts, range(data.shape[0])):
    print(name, x, y)

In [None]:
colors = ['#8dd3c7','#ffffb3','#bebada','#fb8072','#80b1d3','#fdb462','#b3de69','#fccde5','#d9d9d9','#bc80bd','#ccebc5','#ffed6f']
fig = go.Figure(go.Funnel(
    y = data.ingredients,
    x = data.counts,
    textposition = "inside",
    text = data.percent,
    opacity = 0.65, marker = {"color": ['#fb8072', '#bc80bd', '#8dd3c7', '#bebada', '#ffffb3'],
    },
    connector = {"line": {"color": "royalblue", "dash": "dot", "width": 1}})
    )
for name, x, y in zip(data.ingredients, data.counts, range(data.shape[0])):
    fig.add_annotation(x=-x/2.6, y=y,
                text=name, showarrow=False)
fig.update_layout({'plot_bgcolor': 'rgba(0, 0, 0, 0)', 'paper_bgcolor': 'rgba(0, 0, 0, 0)'},
                  margin={"r":5,"t":5,"l":5,"b":5}, xaxis_title='Year',font={'size':16})
fig.layout.yaxis.showticklabels = False
fig.show()

In [None]:
df.to_csv('../data/ingredient_data.csv', index=False)

In [None]:
df