In [11]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)

In [2]:
start_row = 10  # starting from row 10
number_of_rows = 16  # reading from row 10 to 26

df = pd.read_csv('3210001301-eng.csv', skiprows=range(start_row - 1), nrows=number_of_rows)
df.drop(['Supply and disposition of grains', 'March 2024'] , axis=1, inplace=True)

# replace all commas in the dataframe
df = df.apply(lambda x: x.str.replace(',', ''))

# Transform all columns with "20**" to numeric
for col in df.columns:
    if '20' in col:
        df[col] = pd.to_numeric(df[col], errors='coerce')

# Rename the columns
df.columns = [name.split(' ')[1] for name in df.columns]

# drop the first two rows
df = df.iloc[2:]

# Replace the name of the first column
df.rename(columns={df.columns[0]: 'Crop'}, inplace=True)

# Remove digits in the Crop column
df['Crop'] = df['Crop'].str.replace(r'\d+', '', regex=True)

# Remove leading and trailing spaces in the Crop column
df['Crop'] = df['Crop'].str.strip()

# Round all numeric values
df = df.round(0)

df.head()

Unnamed: 0,Crop,2018,2018.1,2018.2,2019,2019.1,2019.2,2020,2020.1,2020.2,2021,2021.1,2021.2,2022,2022.1,2022.2,2023,2023.1,2023.2
2,Durum wheat,4962.0,4962.0,5785.0,5785.0,5785.0,5017.0,5017.0,5017.0,6571.0,6571.0,6571.0,3032.0,3032.0,3032.0,5790.0,5790.0,5790.0,4045.0
3,Wheat excluding durum,25415.0,25415.0,26567.0,26567.0,26567.0,27653.0,27653.0,27653.0,28866.0,28866.0,28866.0,19390.0,19390.0,19390.0,28545.0,28545.0,28545.0,27909.0
4,Oats,3733.0,3733.0,3436.0,3436.0,3436.0,4227.0,4227.0,4227.0,4576.0,4576.0,4576.0,2899.0,2899.0,2899.0,5226.0,5226.0,5226.0,2636.0
5,Barley,7891.0,7891.0,8380.0,8380.0,8380.0,10383.0,10383.0,10383.0,10741.0,10741.0,10741.0,6984.0,6984.0,6984.0,9987.0,9987.0,9987.0,8896.0
6,Rye,341.0,341.0,236.0,236.0,236.0,333.0,333.0,333.0,488.0,488.0,488.0,372.0,372.0,372.0,520.0,520.0,520.0,358.0


In [3]:
# Add crop production data for each year
df_melted = df.melt(id_vars='Crop', var_name='Year', value_name='Metric_Tonnes')
df_grouped = df_melted.groupby(['Crop', 'Year'], as_index=False).sum()
df_grouped

Unnamed: 0,Crop,Year,Metric_Tonnes
0,Barley,2018,24162.0
1,Barley,2019,27143.0
2,Barley,2020,31507.0
3,Barley,2021,28466.0
4,Barley,2022,23955.0
...,...,...,...
79,Wheat excluding durum,2019,80787.0
80,Wheat excluding durum,2020,84172.0
81,Wheat excluding durum,2021,77122.0
82,Wheat excluding durum,2022,67325.0


In [5]:
fig = px.bar(df_grouped, color='Crop', y='Metric_Tonnes', x='Year', barmode='group')
fig.update_yaxes(title="")
fig.update_xaxes(title='')
fig.update_layout(width=800, height=600)
fig.show()

# Diverging Color

Diverging color scales are appropriate for continuous data that has a natural midpoint

In [20]:
fig = px.scatter(
    df_grouped, y='Crop', x='Year', color='Metric_Tonnes', size='Metric_Tonnes',
    color_continuous_scale = px.colors.diverging.RdBu,
)
fig.update_layout(
        paper_bgcolor="white",
        plot_bgcolor="white",
    )
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_coloraxes(colorbar=dict(title='Metric Tonnes'))
fig.update_yaxes(title="")
fig.update_xaxes(title='')
fig.update_layout(width=800, height=600)
fig.show()

- Diverging reversed

In [17]:
fig = px.scatter(
    df_grouped, y='Crop', x='Year', color='Metric_Tonnes', size='Metric_Tonnes',
    color_continuous_scale = px.colors.sequential.RdBu_r,
)
fig.update_layout(
        paper_bgcolor="white",
        plot_bgcolor="white",
    )
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_coloraxes(colorbar=dict(title='Metric Tonnes'))
fig.update_yaxes(title="")
fig.update_xaxes(title='')
fig.update_layout(width=800, height=600)
fig.show()

In [21]:
fig = px.scatter(
    df_grouped, y='Crop', x='Year', color='Metric_Tonnes', size='Metric_Tonnes',
    color_continuous_scale = px.colors.diverging.balance,
)
fig.update_layout(
        paper_bgcolor="white",
        plot_bgcolor="white",
    )
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_coloraxes(colorbar=dict(title='Metric Tonnes'))
fig.update_yaxes(title="")
fig.update_xaxes(title='')
fig.update_layout(width=800, height=600)
fig.show()

# Sequential Color

Sequential color scales are appropriate for most continuous data

In [18]:
fig = px.scatter(
    df_grouped, y='Crop', x='Year', color='Metric_Tonnes', size='Metric_Tonnes',
    color_continuous_scale = px.colors.sequential.Reds,
)
fig.update_layout(
        paper_bgcolor="white",
        plot_bgcolor="white",
    )
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_coloraxes(colorbar=dict(title='Metric Tonnes'))
fig.update_yaxes(title="")
fig.update_xaxes(title='')
fig.update_layout(width=800, height=600)
fig.show()