In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import altair as alt

In [2]:
df = pd.read_csv('coffee_value_chain.csv')
df

Unnamed: 0,phase,illustration,commercial_coffee_lb,commercial_coffee_kg,green_coffee_lb,green_coffee_kg,addtl_cost,1,2.204623
0,Farming,,1.01,0.458128,3.24,1.469639,,,
1,Transportation,,1.165,0.528435,3.395,1.539946,0.155,,
2,Processing & milling,,1.872274,0.849249,4.102274,1.86076,0.707274,1.297274,
3,Exporting & importing,,2.027274,0.919556,4.257274,1.931067,0.155,,
4,Roasting,,2.420274,1.097818,4.650274,2.109328,0.393,,
5,Retailing,,5.320274,2.413235,7.550274,3.424746,2.9,,


In [4]:
df = df[['phase', 'commercial_coffee_kg', 'green_coffee_kg', 'addtl_cost']]
df.iloc[0]['addtl_cost'] = 0
df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,phase,commercial_coffee_kg,green_coffee_kg,addtl_cost
0,Farming,0.458128,1.469639,
1,Transportation,0.528435,1.539946,0.155
2,Processing & milling,0.849249,1.86076,0.707274
3,Exporting & importing,0.919556,1.931067,0.155
4,Roasting,1.097818,2.109328,0.393
5,Retailing,2.413235,3.424746,2.9


## Split the df

In [9]:
df_c = df[['phase', 'commercial_coffee_kg']]
df_g = df[['phase', 'green_coffee_kg']]

df_c = df_c.rename(columns={'commercial_coffee_kg': 'usd_kg'})
df_g = df_g.rename(columns={'green_coffee_kg': 'usd_kg'})

df_c['type'] = 'Commodity'
df_g['type'] = 'Green'

df_all = pd.concat([df_c, df_g])
df_all

Unnamed: 0,phase,usd_kg,type
0,Farming,0.458128,Commodity
1,Transportation,0.528435,Commodity
2,Processing & milling,0.849249,Commodity
3,Exporting & importing,0.919556,Commodity
4,Roasting,1.097818,Commodity
5,Retailing,2.413235,Commodity
0,Farming,1.469639,Green
1,Transportation,1.539946,Green
2,Processing & milling,1.86076,Green
3,Exporting & importing,1.931067,Green


## Plot chart

In [10]:
from vega_datasets import data

source = data.iowa_electricity()

alt.Chart(source).mark_area().encode(
    x="year:T",
    y="net_generation:Q",
    color="source:N"
)

In [33]:
order = df_g['phase'].tolist()

In [45]:

base = alt.Chart(df_all).encode(
    x=alt.Y('phase:O', sort=order),
    y="usd_kg:Q",
    color="type:N"
).properties(width=1000, height=300)

base.mark_point() + base.mark_area(interpolate='monotone', opacity=0.3) 

In [54]:

base1 = alt.Chart(df_g).encode(
    x=alt.Y('phase:O', sort=order),
    y="usd_kg:Q",
    color="type:N"
).properties(width=1000, height=300)

base1.mark_point() + base.mark_area(interpolate='monotone', opacity=0.3) 

In [55]:
base2 = alt.Chart(df_c).encode(
    x=alt.Y('phase:O', sort=order),
    y="usd_kg:Q",
    color="type:N"
).properties(width=1000, height=300)

base2.mark_point() + base.mark_area(interpolate='monotone', opacity=0.3) 

In [43]:
import altair as alt
from altair.expr import datum

from vega_datasets import data
stocks = data.stocks.url

base = alt.Chart(stocks).encode(
    x='date:T',
    y='price:Q',
    color='symbol:N'
).transform_filter(
    datum.symbol == 'GOOG'
)

base.mark_line() + base.mark_point()

In [58]:
import altair as alt
from vega_datasets import data

source = data.stocks()

highlight = alt.selection(type='single', on='mouseover',
                          fields=['symbol'], nearest=True)

base = alt.Chart(source).encode(
    x='date:T',
    y='price:Q',
    color='symbol:N'
)

points = base.mark_circle().encode(
    opacity=alt.value(0.5)
).add_selection(
    highlight
).properties(
    width=600
)

lines = base.mark_line().encode(
    size=alt.condition(~highlight, alt.value(1), alt.value(3))
)

points + lines

In [76]:
base = alt.Chart(df_all).encode(
    x=alt.Y('phase:O', sort=order),
    y="usd_kg:Q",
    color="type:N"
)

points = base.mark_point().encode(
    opacity=alt.value(0.7)
).add_selection(
    highlight
).properties(
    width=1000
)

lines = base.mark_line(interpolate="monotone", opacity=0.7)
# points + lines
points + lines

In [79]:
df['diff'] = df['green_coffee_kg'] - df['commercial_coffee_kg']
df

Unnamed: 0,phase,commercial_coffee_kg,green_coffee_kg,addtl_cost,diff
0,Farming,0.458128,1.469639,,1.011511
1,Transportation,0.528435,1.539946,0.155,1.011511
2,Processing & milling,0.849249,1.86076,0.707274,1.011511
3,Exporting & importing,0.919556,1.931067,0.155,1.011511
4,Roasting,1.097818,2.109328,0.393,1.011511
5,Retailing,2.413235,3.424746,2.9,1.011511
