In [None]:
import pandas as pd
import numpy as np
import json
from pathlib import Path
from itables import init_notebook_mode, show
import matplotlib.pyplot as plt
import seaborn as sns
#pd.set_option('display.float_format',  '{:18,.0}'.format)
pd.set_option('display.float_format', lambda x: f'{x:15,.0f}' if abs(x)>1e5 else f'{x:15.2f}')

year = 2021
quarter = 1
flow = 'import'

import itables

# Initialize interactive display mode
itables.init_notebook_mode(all_interactive=True)


share_total=0.05
no_of_months=5
no_of_months_seasons=3
section_seasons='II'
price_cv=0.5
max_by_min=10
max_by_median=5
median_by_min=5
share_small=0.0001
n_transactions_year = 30

In [None]:
#### Visualization of tresholds and accepted HS

In [None]:
# Example of defining axis_variables (make sure to adjust this to your actual variable names)
axis_variables = [
    'HS_sum', 'n_transactions_year', 
    'price', 'no_of_months', 
    'price_max', 'price_min', 'price_median', 
    'price_mean', 'price_sd', 'price_cv', 
    'share_total', 'T_sum_small', 'share_small', 
    'max_by_min', 'max_by_median', 'median_by_min'
]

# Make sure to re-run all the necessary imports and previous code
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from ipywidgets import interact, FloatSlider, Dropdown, Checkbox, Output, VBox, FloatText
from IPython.display import display

# Initialize current_data
current_data = None

def apply_thresholds(row, no_of_months, no_of_months_seasons, price_cv, max_by_min, max_by_median, median_by_min, share_small):
    conditions = [
        row['no_of_months'] >= no_of_months,
        # Add the new condition: if section_seasons is 'II'
        row['no_of_months'] >= no_of_months_seasons if row['section'] == section_seasons else True,
        row['price_cv'] < price_cv,
        row['max_by_min'] < max_by_min,
        row['max_by_median'] < max_by_median,
        row['median_by_min'] < median_by_min,
        row['share_small'] > share_small
    ]
    
    return all(conditions)

# Function to update 'accepted_HS' based on sliders
def update_accepted_HS(basedata1, no_of_months, no_of_months_seasons, price_cv, max_by_min, max_by_median, median_by_min, share_small):
    # Make sure to operate on a copy to avoid the warning
    basedata1 = basedata1.copy()
    
    basedata1['accepted_HS'] = basedata1.apply(apply_thresholds, axis=1, args=(no_of_months, no_of_months_seasons, price_cv, max_by_min, max_by_median, median_by_min, share_small))

    return basedata1

# Function to plot the transactions
def plot_transactions(ax, data, x_var, y_var):
    # Scatter plot using seaborn with hue set to 'accepted_HS'
    sns.scatterplot(
        data=data,
        x=x_var,
        y=y_var,
        hue='accepted_HS',
        palette={True: 'blue', False: 'red'},
        ax=ax
    )
    ax.set_title(f'{flow.capitalize()} {year}. Acceptable volatility and HS')
    ax.set_xlabel(x_var)
    ax.set_ylabel(y_var)

# Function to update plots based on selected parameters
def update_plot(x_var, y_var, no_of_months, no_of_months_seasons, price_cv, max_by_min, max_by_median, median_by_min, share_small, filter_false):
    # Update 'accepted_HS' based on the thresholds and get updated data
    updated_data = update_accepted_HS(basedata1, no_of_months, no_of_months_seasons, price_cv, max_by_min, max_by_median, median_by_min, share_small)

    global current_data  # Declare current_data as global
    current_data = updated_data  # Update current_data with the updated dataset

    # Filter out false accepted HS if the checkbox is checked
    if filter_false:
        updated_data = updated_data[updated_data['accepted_HS'] == True]

    # Count accepted_HS
    accepted_count = updated_data['accepted_HS'].value_counts()
    total_count = updated_data.shape[0]
    
    # Display the report
    report_output.clear_output()  # Clear previous output
    with report_output:
        print(f"**{flow.capitalize()} {year}. Accepted HS Count Report**")
        print(f"Total Entries: {total_count}")
        print(f"Accepted HS (True): {accepted_count.get(True, 0)}")
        print(f"Rejected HS (False): {accepted_count.get(False, 0)}")
        print("******************************")

    # Clear the current figure
    plt.clf()

    # Create a new figure and axis
    fig, ax = plt.subplots(figsize=(10, 6))

    # Plot the filtered data
    plot_transactions(ax, updated_data, x_var, y_var)

    # Adjust layout and display the plot
    plt.tight_layout()
    plt.show()

# Widgets for user inputs
x_var_dropdown = Dropdown(options=axis_variables, description="X Variable", value='HS_sum')  # Set default x variable
y_var_dropdown = Dropdown(options=axis_variables, description="Y Variable", value='price_cv')  # Set default y variable

# Sliders for threshold values (now positioned below the dropdowns)
no_of_months_slider = FloatSlider(value=5, min=1, max=12, step=1, description='No of Months')
no_of_months_seasons_slider = FloatSlider(value=3, min=1, max=12, step=1, description='No of Months Seasons')  # New slider
price_cv_slider = FloatSlider(value=0.5, min=0, max=10, step=0.1, description='Price CV')
max_by_min_slider = FloatSlider(value=10, min=1, max=40, step=0.1, description='Max/Min')
max_by_median_slider = FloatSlider(value=5, min=1, max=40, step=0.1, description='Max/Median')
median_by_min_slider = FloatSlider(value=5, min=1, max=40, step=0.1, description='Median/Min')

# Text input for the share_small threshold
share_small_text = FloatText(value=0.0001, description='Share Small:', layout={'width': '200px'})

# Checkbox for filtering out false accepted HS
filter_false_checkbox = Checkbox(value=False, description="Accepted HS", layout={'width': '250px', 'height': '30px'})

# Output widget for report
report_output = Output()

# Display the report output
display(report_output)

# Interactive plot function
interact(
    update_plot,
    x_var=x_var_dropdown,
    y_var=y_var_dropdown,
    no_of_months=no_of_months_slider,
    no_of_months_seasons=no_of_months_seasons_slider,  # Add this parameter
    price_cv=price_cv_slider,
    max_by_min=max_by_min_slider,
    max_by_median=max_by_median_slider,
    median_by_min=median_by_min_slider,
    share_small=share_small_text,
    filter_false=filter_false_checkbox
)

In [None]:


# Example series_labels dictionary
series_labels = {
    "0": "0 - Food and live animals",
    "1": "1 - Beverages and tobacco",
    "2": "2 - Crude materials, inedible, except fuels",
    "3": "3 - Mineral fuels, lubricants and related materials",
    "4": "4 - Animal and vegetable oils, fats and waxes",
    "5": "5 - Chemicals and related products n.e.s.",
    "6": "6 - Manufactured goods classified chiefly by material",
    "7": "7 - Machinery and transport equipment",
    "8": "8 - Miscellaneous manufactured articles",
    "9": "9 - Commodities and transactions"
}

# Filter data based on the level
data = index_chained.loc[index_chained['level'] == 'Sitc1']
df = pd.DataFrame(data)

# Create 'periode' column
df["periode"] = df["year"].astype(str) + "-Q" + df["quarter"].astype(str)

# Display the DataFrame

# Select only specific series (modify the list to include desired series)
selected_series = ['6', '7']  # Replace with the series you want
df_selected = df[df['series'].isin(selected_series)]

# Set figure size and style
plt.figure(figsize=(12, 6))
sns.set_style("whitegrid")

# Create the line plot
sns.lineplot(
    data=df_selected,
    x="periode",
    y="index_chained",
    hue="series",
    marker="o"
)

# Add labels at the end of each line using the series_labels dictionary
for series in selected_series:
    series_data = df_selected[df_selected['series'] == series]
    
    # Check if series_data is empty
    if not series_data.empty:
        # Get the last value for x and y
        last_point = series_data.iloc[-1]
        label = series_labels.get(series, series)  # Get the label or fallback to the series code
        plt.text(last_point['periode'], last_point['index_chained'], label,
                 horizontalalignment='left', size='medium', color='black', weight='semibold')
    else:
        print(f"Warning: No data for series {series}")

# Formatting
plt.xlabel("Period")
plt.ylabel("Index (Chained)")
plt.title(f'Price index (2022=100) - {flow}')
plt.xticks(rotation=45)  # Rotate x-axis labels if needed
plt.legend(title="Series")
plt.tight_layout()  # Adjust layout to fit everything nicely

# Show the plot
plt.show()
