# Main Backend Codes for the analytical platform

## Functions

### Database collector

This will collect all of the dataframes that was generated from all the previous matching result score

In [1]:
import gc
import pandas as pd
import os
import psutil
import numpy as np
import time
from tkinter import Tk
from tkinter.filedialog import askdirectory

global desired_indices
desired_indices = range(0000, 10000)

def df_count_collector():
    global df_true_save
    global df_false_save
    
    df_true_save = pd.DataFrame(columns=['score', 'count'])
    df_true_save['score'] = df_true_save['score'].astype(int) #converting the string into integer
    df_true_save.set_index('score', inplace=True)
    df_true_save = df_true_save.reindex(desired_indices)
    df_true_save['count'].fillna('0', inplace=True)
    df_true_save['count'] = df_true_save['count'].astype(int)
    df_true_save.reset_index(inplace=True)

    df_false_save = pd.DataFrame(columns=['score', 'count'])
    df_false_save['score'] = df_false_save['score'].astype(int) #converting the string into integer
    df_false_save.set_index('score', inplace=True)
    df_false_save = df_false_save.reindex(desired_indices)
    df_false_save['count'].fillna('0', inplace=True)
    df_false_save['count'] = df_false_save['count'].astype(int)
    df_false_save.reset_index(inplace=True)
    return(df_true_save,df_false_save)



### Browser folder

This allows the user to browse the folder where the matching result score is located at.

In [None]:
def browse_button():
    global path_read
    root = Tk()
    root.withdraw()
    root.wm_attributes('-topmost', 1)
    path_read = askdirectory(title='Select Folder') # shows dialog box and return the path
    print(path_read)

### Process the files

This process processes the files and outputs to a CSV that contains the matching score as well as the number of occurences for each matching score, this can be used to plot the FMR and FAR curve.

In [None]:
def processing():
    for root, dirs, files in os.walk(path_read):
        for file in files:
            data_list = [] 
            filename, extension = os.path.splitext(file)
            if extension == '.txt':
                print("Reading: "+file)
                file_path = os.path.join(root,file)
                with open(file_path, 'r') as f:
                    start = time.time()
                    lines = f.readlines()
                    for line in lines:
                        values = line.strip().split(',')
                        
                    for i in range(0, len(values)):
                        id1 = values[i][:7]
                        id2 = values[i][7:14]
                        score = values[i][14:18]

                        data_list.append({
                            'ID1': id1,
                            'ID2': id2,
                            'Score': score
                        })
                    f.close()
                    end = time.time()
                duration = round(end-start,1)
                df = pd.DataFrame(data_list)
                
                df['Same_IDs'] = df['ID1'] == df['ID2']
                df['Same_IDs'] = df['Same_IDs'].map({True: 'Mated', False: 'Non-Mated'})
                #making 2 dataframes
                df_true = df[df['Same_IDs'] == 'Mated'].copy()
                df_false = df[df['Same_IDs'] == 'Non-Mated'].copy()
                df_true.drop(df_true.tail(1).index,inplace=True)
                #print(df_true)
                #print(df_false)
                df_true.drop(columns=['Same_IDs'], inplace=True)
                df_false.drop(columns=['Same_IDs'], inplace=True)
                
                #This is dataframe that contains the mated and non-mated results
                #print("DataFrame for 'Mated' values:")
                #print(df_true)

                #print("\nDataFrame for 'Non-Mated' values:")
                #print(df_false)

                df_true.sort_values(by='Score', inplace=True)
                df_false.sort_values(by='Score', inplace=True)
                #This is the dataframe that contains the mated and non-mated results that are sorted
                #print("DataFrame for 'Mated' values:")
                #print(df_true)

                #print("\nDataFrame for 'Non-Mated' values:")
                #print(df_false)

                # Count occurrences of each score for mated
                score_counts_true = df_true['Score'].value_counts()
                
                # Convert the result to a DataFrame if needed

                result_df_true = pd.DataFrame({'score': score_counts_true.index, 'count': score_counts_true.values})
                
                result_df_true['score'] = result_df_true['score'].astype(int) #converting the string into integer
                result_df_true.set_index('score', inplace=True)
                result_df_true = result_df_true.reindex(desired_indices)
                result_df_true['count'].fillna('0', inplace=True)
                result_df_true.reset_index(inplace=True)
                result_df_true['count'] = result_df_true['count'].astype(int)
                # Save the result to a CSV file
                result_df_true.to_csv('score_counts_true.csv', index=False)

                #Printing the results of the mated
                #print("Mated Frequency")
                #print(result_df_true)

                # This is the non-mated
                score_counts_false = df_false['Score'].value_counts()

                # Convert the result to a DataFrame if needed
                result_df_false = pd.DataFrame({'score': score_counts_false.index, 'count': score_counts_false.values})
                result_df_false['score'] = result_df_false['score'].astype(int) #converting the string into integer
                result_df_false.set_index('score', inplace=True)
                result_df_false = result_df_false.reindex(desired_indices)
                result_df_false['count'].fillna('0', inplace=True)
                result_df_false.reset_index(inplace=True)
                result_df_false['count'] = result_df_false['count'].astype(int)
                # Save the result to a CSV file
                result_df_false.to_csv('score_counts_false.csv', index=False)
                #Printing the results of the non-mated
                #print("Non-Mated Frequency")
                #print(result_df_false)

                #print(df)
                process = psutil.Process()
                #print(process.memory_info().rss)
                ram_usage = round(psutil.Process(os.getpid()).memory_info().rss / 1024 ** 3,1)
                print(f"Ram Usage (GB): {ram_usage}")
                #print(f"Processing time: {duration} seconds")
                
                #df_true_save[df_true_save.columns[1]] = df_true_save[df_true_save.columns[1]] + result_df_true[result_df_true.columns[1]]
                df_true_save['count'] = df_true_save['count'] + result_df_true['count']
                df_true_save.to_csv('score_counts_true_total.csv', index=False)
                df_false_save['count'] = df_false_save['count'] + result_df_false['count']
                df_false_save.to_csv('score_counts_false_total.csv', index=False)
                #Clearing memory to prevent RAM bottleneck
                del df_true
                del df_false
                del score_counts_true
                del score_counts_false
                del result_df_true
                del result_df_false
                del data_list
                del df
                gc.collect()
                    #print(content)
                print(file + ' is done')

In [None]:
def browse_button():
    global path_read
    root = Tk()
    root.withdraw()
    root.wm_attributes('-topmost', 1)
    path_read = askdirectory(title='Select Folder') # shows dialog box and return the path
    print(path_read)

def processing():
    for root, dirs, files in os.walk(path_read):
        for file in files:
            data_list = [] 
            filename, extension = os.path.splitext(file)
            if extension == '.txt':
                print("Reading: "+file)
                file_path = os.path.join(root,file)
                with open(file_path, 'r') as f:
                    start = time.time()
                    lines = f.readlines()
                    for line in lines:
                        values = line.strip().split(',')
                        
                    for i in range(0, len(values)):
                        id1 = values[i][:7]
                        id2 = values[i][7:14]
                        score = values[i][14:18]

                        data_list.append({
                            'ID1': id1,
                            'ID2': id2,
                            'Score': score
                        })
                    f.close()
                    end = time.time()
                duration = round(end-start,1)
                df = pd.DataFrame(data_list)
                
                df['Same_IDs'] = df['ID1'] == df['ID2']
                df['Same_IDs'] = df['Same_IDs'].map({True: 'Mated', False: 'Non-Mated'})
                #making 2 dataframes
                df_true = df[df['Same_IDs'] == 'Mated'].copy()
                df_false = df[df['Same_IDs'] == 'Non-Mated'].copy()
                df_true.drop(df_true.tail(1).index,inplace=True)
                #print(df_true)
                #print(df_false)
                df_true.drop(columns=['Same_IDs'], inplace=True)
                df_false.drop(columns=['Same_IDs'], inplace=True)
                
                #This is dataframe that contains the mated and non-mated results
                #print("DataFrame for 'Mated' values:")
                #print(df_true)

                #print("\nDataFrame for 'Non-Mated' values:")
                #print(df_false)

                df_true.sort_values(by='Score', inplace=True)
                df_false.sort_values(by='Score', inplace=True)
                #This is the dataframe that contains the mated and non-mated results that are sorted
                #print("DataFrame for 'Mated' values:")
                #print(df_true)

                #print("\nDataFrame for 'Non-Mated' values:")
                #print(df_false)

                # Count occurrences of each score for mated
                score_counts_true = df_true['Score'].value_counts()
                
                # Convert the result to a DataFrame if needed

                result_df_true = pd.DataFrame({'score': score_counts_true.index, 'count': score_counts_true.values})
                
                result_df_true['score'] = result_df_true['score'].astype(int) #converting the string into integer
                result_df_true.set_index('score', inplace=True)
                result_df_true = result_df_true.reindex(desired_indices)
                result_df_true['count'].fillna('0', inplace=True)
                result_df_true.reset_index(inplace=True)
                result_df_true['count'] = result_df_true['count'].astype(int)
                # Save the result to a CSV file
                result_df_true.to_csv('score_counts_true.csv', index=False)

                #Printing the results of the mated
                #print("Mated Frequency")
                #print(result_df_true)

                # This is the non-mated
                score_counts_false = df_false['Score'].value_counts()

                # Convert the result to a DataFrame if needed
                result_df_false = pd.DataFrame({'score': score_counts_false.index, 'count': score_counts_false.values})
                result_df_false['score'] = result_df_false['score'].astype(int) #converting the string into integer
                result_df_false.set_index('score', inplace=True)
                result_df_false = result_df_false.reindex(desired_indices)
                result_df_false['count'].fillna('0', inplace=True)
                result_df_false.reset_index(inplace=True)
                result_df_false['count'] = result_df_false['count'].astype(int)
                # Save the result to a CSV file
                result_df_false.to_csv('score_counts_false.csv', index=False)
                #Printing the results of the non-mated
                #print("Non-Mated Frequency")
                #print(result_df_false)

                #print(df)
                process = psutil.Process()
                #print(process.memory_info().rss)
                ram_usage = round(psutil.Process(os.getpid()).memory_info().rss / 1024 ** 3,1)
                print(f"Ram Usage (GB): {ram_usage}")
                #print(f"Processing time: {duration} seconds")
                
                #df_true_save[df_true_save.columns[1]] = df_true_save[df_true_save.columns[1]] + result_df_true[result_df_true.columns[1]]
                df_true_save['count'] = df_true_save['count'] + result_df_true['count']
                df_true_save.to_csv('score_counts_true_total.csv', index=False)
                df_false_save['count'] = df_false_save['count'] + result_df_false['count']
                df_false_save.to_csv('score_counts_false_total.csv', index=False)
                #Clearing memory to prevent RAM bottleneck
                del df_true
                del df_false
                del score_counts_true
                del score_counts_false
                del result_df_true
                del result_df_false
                del data_list
                del df
                gc.collect()
                    #print(content)
                print(file + ' is done')


df_count_collector()
browse_button()
processing()

# Implementing the histogram

In [None]:
import gc
import pandas as pd
import os
import psutil
import numpy as np
import time
from tkinter import Tk
from tkinter.filedialog import askdirectory
from tkinter import filedialog as fd
import matplotlib.pyplot as plt
from sqlalchemy import create_engine

global desired_indices
desired_indices = range(0000, 10000)

def database_connection():
    DB_USER = 'root'
    DB_PASSWORD = 'HTXB&P'
    DB_HOST = '127.0.0.1'
    DB_PORT = '3306'
    DB_NAME = 'iNspectoratedb'
    global connection_string
    global engine
    # The format is 'mariadb://user:password@host:port/database'
    connection_string = f'mariadb://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
    engine = create_engine(connection_string)
database_connection()
def df_collector():
    global df_true_save
    global df_false_save

    df_true_save = pd.DataFrame(columns=['score', 'count'])
    df_true_save['score'] = df_true_save['score'].astype(int) #converting the string into integer
    df_true_save.set_index('score', inplace=True)
    df_true_save = df_true_save.reindex(desired_indices)
    df_true_save['count'].fillna('0', inplace=True)
    df_true_save['count'] = df_true_save['count'].astype(int)
    df_true_save.reset_index(inplace=True)

    df_false_save = pd.DataFrame(columns=['score', 'count'])
    df_false_save['score'] = df_false_save['score'].astype(int) #converting the string into integer
    df_false_save.set_index('score', inplace=True)
    df_false_save = df_false_save.reindex(desired_indices)
    df_false_save['count'].fillna('0', inplace=True)
    df_false_save['count'] = df_false_save['count'].astype(int)
    df_false_save.reset_index(inplace=True)
    return(df_true_save,df_false_save)

def browse_button():
    global path_read
    root = Tk()
    root.withdraw()
    root.wm_attributes('-topmost', 1)
    path_read = askdirectory(title='Select Folder') # shows dialog box and return the path
    print(path_read)


def processing():
    for root, dirs, files in os.walk(path_read):
        for file in files:
            data_list = [] 
            filename, extension = os.path.splitext(file)
            if extension == '.txt':
                print("Reading: "+file)
                file_path = os.path.join(root,file)
                with open(file_path, 'r') as f:
                    start = time.time()
                    lines = f.readlines()
                    for line in lines:
                        values = line.strip().split(',')
                        
                    for i in range(0, len(values)):
                        id1 = values[i][:7]
                        id2 = values[i][7:14]
                        score = values[i][14:18]

                        data_list.append({
                            'ID1': id1,
                            'ID2': id2,
                            'Score': score
                        })
                    f.close()
                    end = time.time()
                duration = round(end-start,1)
                df = pd.DataFrame(data_list)
                
                df['Same_IDs'] = df['ID1'] == df['ID2']
                df['Same_IDs'] = df['Same_IDs'].map({True: 'Y', False: 'N'})
                #making 2 dataframes
                df_true = df[df['Same_IDs'] == 'Y'].copy()
                df_false = df[df['Same_IDs'] == 'N'].copy()
                df_true.drop(df_true.tail(1).index,inplace=True)
                #print(df_true)
                #print(df_false)
                df_true.drop(columns=['Same_IDs'], inplace=True)
                df_false.drop(columns=['Same_IDs'], inplace=True)
                
                #This is dataframe that contains the mated and non-mated results
                #print("DataFrame for 'Mated' values:")
                #print(df_true)

                #print("\nDataFrame for 'Non-Mated' values:")
                #print(df_false)

                df_true.sort_values(by='Score', inplace=True)
                df_false.sort_values(by='Score', inplace=True)
                #This is the dataframe that contains the mated and non-mated results that are sorted
                #print("DataFrame for 'Mated' values:")
                #print(df_true)

                #print("\nDataFrame for 'Non-Mated' values:")
                #print(df_false)

                # Count occurrences of each score for mated
                score_counts_true = df_true['Score'].value_counts()
                
                # Convert the result to a DataFrame if needed

                result_df_true = pd.DataFrame({'score': score_counts_true.index, 'count': score_counts_true.values})
                
                result_df_true['score'] = result_df_true['score'].astype(int) #converting the string into integer
                result_df_true.set_index('score', inplace=True)
                result_df_true = result_df_true.reindex(desired_indices)
                result_df_true['count'].fillna('0', inplace=True)
                result_df_true.reset_index(inplace=True)
                result_df_true['count'] = result_df_true['count'].astype(int)
                # Save the result to a CSV file
                result_df_true.to_csv('score_counts_true.csv', index=False)

                #Printing the results of the mated
                #print("Mated Frequency")
                #print(result_df_true)

                # This is the non-mated
                score_counts_false = df_false['Score'].value_counts()

                # Convert the result to a DataFrame if needed
                result_df_false = pd.DataFrame({'score': score_counts_false.index, 'count': score_counts_false.values})
                result_df_false['score'] = result_df_false['score'].astype(int) #converting the string into integer
                result_df_false.set_index('score', inplace=True)
                result_df_false = result_df_false.reindex(desired_indices)
                result_df_false['count'].fillna('0', inplace=True)
                result_df_false.reset_index(inplace=True)
                result_df_false['count'] = result_df_false['count'].astype(int)
                # Save the result to a CSV file
                result_df_false.to_csv('score_counts_false.csv', index=False)
                #Printing the results of the non-mated
                #print("Non-Mated Frequency")
                #print(result_df_false)

                #print(df)
                process = psutil.Process()
                #print(process.memory_info().rss)
                ram_usage = round(psutil.Process(os.getpid()).memory_info().rss / 1024 ** 3,1)
                print(f"Ram Usage (GB): {ram_usage}")
                #print(f"Processing time: {duration} seconds")
                
                #df_true_save[df_true_save.columns[1]] = df_true_save[df_true_save.columns[1]] + result_df_true[result_df_true.columns[1]]
                df_true_save['count'] = df_true_save['count'] + result_df_true['count']
                df_true_save.to_csv('score_counts_true_total.csv', index=False)
                df_false_save['count'] = df_false_save['count'] + result_df_false['count']
                df_false_save.to_csv('score_counts_false_total.csv', index=False)
                
                #TABLE_NAME = 'all data'
                '''
                delete_query = f'DELETE FROM {TABLE_NAME}'
                engine.connect().execute(delete_query)
                '''
                df.drop(df.columns[[0]], axis=1, inplace=True)
                df.drop(df.columns[[0]], axis=1, inplace=True)
                
                df.to_sql('all data', con=engine, index=False, if_exists='append')
                #Clearing memory to prevent RAM bottleneck
                del df_true
                del df_false
                del score_counts_true
                del score_counts_false
                del result_df_true
                del result_df_false
                del data_list
                del df
                gc.collect()
                    #print(content)
                print(file + ' is done')


df_collector()
browse_button()
processing()
df_true_save.to_sql('mated frequency', con=engine, index=False, if_exists='replace')
df_false_save.to_sql('nonmated frequency', con=engine, index=False, if_exists='replace')

def browse_quality():
    global quality_read
    root = Tk()
    root.withdraw()
    root.wm_attributes('-topmost', 1)
    quality_read = fd.askopenfilename(title='Select Quality Score File') # shows dialog box and return the path
    print(quality_read)


#Currently this function is not in use as it ha
def quality_score():
    global quality
    #this is the quality score dataframe
    quality = pd.DataFrame(columns=['qualityscore'])
    quality['qualityscore'] = quality['qualityscore'].astype(float) #converting the string into integer
    quality.set_index('qualityscore', inplace=True)



def quality_collector():
    quality_list = [] 
    with open(quality_read, 'r') as f:
        start = time.time()
        lines = f.readlines()
        for line in lines:
            values = line.strip().split('\n')
            
            for i in range(0, len(values)):
                q_score = values[i][10:]

                quality_list.append({
                    'Quality Score' : q_score
                })

        f.close()
    global df_quality
    df_quality = pd.DataFrame(quality_list)
    #print(df_quality)
    df_quality['Quality Score'] = df_quality['Quality Score'].astype(float)
    df_quality['Quality Score'].plot(kind='box', title='Overall Quality Score')
    plt.show() 
    global quality_stats
    quality_stats = df_quality.describe()
    quality_stats.insert(0, 'Item', value = ["Count", "Mean", "Standard Deviation","Min", "25% Percentile", "50% Percentile", "75% Percentile", "Max"])
    #print(stats)
    quality_stats.to_csv('quality_score_stats.txt', index=False)
    df_quality.sort_values(by='Quality Score', inplace=True)
    df_quality.to_csv('quality_score_test.csv', index=False)
    gc.collect()

quality_score()
browse_quality()
quality_collector()

df_quality.to_sql('quality table', con=engine, index=False, if_exists='replace')
quality_stats.to_sql('quality stats', con=engine, index=False, if_exists='replace')
df_quality.to_csv('quality_scores.csv', index=False)
quality_stats.to_csv('quality_stats.csv', index=False)

In [7]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.widgets import Slider
import matplotlib
matplotlib.use('TkAgg')
# %%system
# Generate some random data for demonstration
data = np.random.randn(1000)

# Create initial histogram
fig, ax = plt.subplots()
ax.hist(data, bins=20, color='blue', alpha=0.7)

# Add a slider for changing the number of bins
ax_slider = plt.axes([0.25, 0.01, 0.65, 0.03], facecolor='lightgoldenrodyellow')
slider = Slider(ax_slider, 'Bins', valmin=1, valmax=50, valinit=20)
plt.savefig('histogram.png')
def update(val):
    # Update histogram when slider value changes
    num_bins = int(slider.val)
    ax.clear()
    ax.hist(data, bins=num_bins, color='blue', alpha=0.7)
    plt.draw()
    plt.savefig('histogram.png')
    #fig.write_json("histogram.json")

slider.on_changed(update)

plt.show()


In [1]:
import matplotlib.pyplot as plt
import numpy as np

from matplotlib.widgets import Button, Slider
import matplotlib
matplotlib.use('TkAgg')

# The parametrized function to be plotted
def f(t, amplitude, frequency):
    return amplitude * np.sin(2 * np.pi * frequency * t)

t = np.linspace(0, 1, 1000)

# Define initial parameters
init_amplitude = 5
init_frequency = 3

# Create the figure and the line that we will manipulate
fig, ax = plt.subplots()
line, = ax.plot(t, f(t, init_amplitude, init_frequency), lw=2)
ax.set_xlabel('Time [s]')

# adjust the main plot to make room for the sliders
fig.subplots_adjust(left=0.25, bottom=0.25)

# Make a horizontal slider to control the frequency.
axfreq = fig.add_axes([0.25, 0.1, 0.65, 0.03])
freq_slider = Slider(
    ax=axfreq,
    label='Frequency [Hz]',
    valmin=0.1,
    valmax=30,
    valinit=init_frequency,
)

# Make a vertically oriented slider to control the amplitude
axamp = fig.add_axes([0.1, 0.25, 0.0225, 0.63])
amp_slider = Slider(
    ax=axamp,
    label="Amplitude",
    valmin=0,
    valmax=10,
    valinit=init_amplitude,
    orientation="vertical"
)


# The function to be called anytime a slider's value changes
def update(val):
    line.set_ydata(f(t, amp_slider.val, freq_slider.val))
    fig.canvas.draw_idle()


# register the update function with each slider
freq_slider.on_changed(update)
amp_slider.on_changed(update)

# Create a `matplotlib.widgets.Button` to reset the sliders to initial values.
resetax = fig.add_axes([0.8, 0.025, 0.1, 0.04])
button = Button(resetax, 'Reset', hovercolor='0.975')


def reset(event):
    freq_slider.reset()
    amp_slider.reset()
button.on_clicked(reset)

plt.show()

In [2]:
import plotly.graph_objects as go
from ipywidgets import interact

fig = go.FigureWidget()
bar = fig.add_bar(x=['x', '1-x'])
fig.layout = dict(yaxis=dict(range=[0,1]), height=600)

@interact(x=(0, 1, 0.01))
def update(x=0.3):
    with fig.batch_update():
        #fig.data[0].y=[x, 1-x]
        #fig.data[0].y=[x, 1-x]
        fig.bar.y=[x,1-x]
fig

interactive(children=(FloatSlider(value=0.3, description='x', max=1.0, step=0.01), Output()), _dom_classes=('wâ€¦

AttributeError: type object 'DOMWidget' has no attribute '_ipython_display_'

In [2]:
import numpy as np
import plotly.graph_objects as go
import ipywidgets as widgets
from ipywidgets import interactive
from IPython.display import display
import matplotlib.pyplot as plt
# Generate some random data for demonstration
data = np.random.randn(1000)

def update_histogram(num_bins):
    # Update histogram based on the number of bins
    fig = go.Figure(data=[go.Histogram(x=data, nbinsx=num_bins)])
    fig.update_layout(title_text=f'Histogram with {num_bins} Bins')
    fig.write_image("histogram.png")
    fig.write_json("histogram.json")
    fig.write_html("histogram.html")
    #fig.show()

# Create interactive slider
bin_slider = widgets.IntSlider(value=20, min=1, max=50, step=1, description='Bins')
'''
# Create interactive widget
interactive_plot = interactive(update_histogram, num_bins=bin_slider)

# Display the interactive widget
display(interactive_plot)
'''

# Create output area for the slider
output_slider = widgets.Output()

# Create interactive widget
interactive_plot = interactive(update_histogram, num_bins=bin_slider)

# Display the interactive widget and outputs
display(output_slider, interactive_plot)

In [1]:
import numpy as np
import plotly.graph_objects as go
from IPython.display import display
import ipywidgets as widgets
from ipywidgets import interactive


# Generate some random data for demonstration
data = np.random.randn(1000)

# Create initial histogram
fig = go.FigureWidget([go.Histogram(x=data, nbinsx=20)])
fig.update_layout(title_text='Histogram with 20 Bins')

def update_histogram(num_bins):
    # Update histogram based on the number of bins
    fig.data[0].x = data
    fig.data[0].nbinsx = num_bins
    fig.update_layout(title_text=f'Histogram with {num_bins} Bins')

# Create interactive slider
bin_slider = widgets.IntSlider(value=20, min=1, max=50, step=1, description='Bins')

# Create interactive widget
interactive_plot = interactive(update_histogram, num_bins=bin_slider)

# Display the interactive widget
display(fig, interactive_plot)


AttributeError: type object 'DOMWidget' has no attribute '_ipython_display_'

interactive(children=(IntSlider(value=20, description='Bins', max=50, min=1), Output()), _dom_classes=('widgetâ€¦

In [5]:
import numpy as np
import plotly.graph_objects as go
import ipywidgets as widgets
from ipywidgets import interactive
from IPython.display import display, clear_output

# Generate some random data for demonstration
data = np.random.randn(1000)

# Create initial histogram
fig = go.FigureWidget([go.Histogram(x=data, nbinsx=20)])
fig.update_layout(title_text='Histogram with 20 Bins')

# Create output area for the Plotly figure
output_fig = widgets.Output()

def update_histogram(num_bins):
    with output_fig:
        # Clear the previous output
        clear_output(wait=True)

        # Update histogram based on the number of bins
        fig.data[0].x = data
        fig.data[0].nbinsx = num_bins
        fig.update_layout(title_text=f'Histogram with {num_bins} Bins')

        # Display the updated figure
        display(fig)
        #fig.write_image("histogram.png")
        #fig.write_json("histogram.json")
        #fig.write_html("histogram.html")

# Create interactive slider
bin_slider = widgets.IntSlider(value=20, min=1, max=50, step=1, description='Bins')

# Create output area for the slider
output_slider = widgets.Output()

# Create interactive widget
interactive_plot = interactive(update_histogram, num_bins=bin_slider)

# Display the interactive widget and outputs
display(output_fig, output_slider, interactive_plot)
#fig.write_image("histogram.png")

Output()

Output()

interactive(children=(IntSlider(value=20, description='Bins', max=50, min=1), Output()), _dom_classes=('widgetâ€¦

: 

In [None]:
from functools import lru_cache
import scipy.stats as ss
import matplotlib.pyplot as plt
import matplotlib.widgets as widgets
import matplotlib
matplotlib.use('TkAgg')
fig, ax = plt.subplots()
plt.subplots_adjust(bottom=0.25)

weight = ss.lognorm(0.23, 0, 70.8)

@lru_cache
def sampling(n):
    return [ weight.rvs().mean() for i in range(1000) ]

theme = {
    'color' : "#1f77b4",
    'alpha' : 0.7,
}

t = ax.hist(sampling(100), **theme)

slider = widgets.Slider(
    ax      = plt.axes([0.25, 0.1, 0.5, 0.03]),
    label   = "n",
    valmin  = 10,
    valmax  = 1000,
    valinit = 100,
    valstep = 1)

def update(val):
    global t
    del t
    t = ax.hist(sampling(int(val)), **theme)
    fig.canvas.draw_idle()

slider.on_changed(update)
ax.set_title('Distribution of Sample Size Mean')
plt.show()

: 