## Exercise 2-5

In [31]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import math

data = [8, 6, 11, 14, 10, 11, 9, 7, 2, 8, 9, 5, 5, 5, 12, 7, 8, 4, 17, 8, 12, 7, 8, 8, 7, 10, 8, 6, 9, 9, 11, 16, 2, 7, 4, 8, 4, 4, 5, 5, 9, 9, 6, 6, 7, 7, 9, 5, 4, 5, 14, 2, 9, 0, 6, 1, 1, 12, 11, 4]

n = len(data)

# Calculate the smallest k such that 2^k >= n
k = math.ceil(math.log2(n))

# Determine the class boundaries
min_value = min(data)
max_value = max(data)

# Determine class width
class_width = math.ceil((max_value - min_value) / k)

class_intervals = [min_value + i * class_width for i in range(k+1)]

# Create a pandas DataFrame
df = pd.DataFrame(data, columns=['Value'])

# Create frequency distribution using pd.cut with integer bins
df['Class Interval'] = pd.cut(df['Value'], bins=class_intervals, right=False, include_lowest=True)

frequency_distribution = df['Class Interval'].value_counts().sort_index()

x_labels = [f"[{interval.left}, {interval.right})" for interval in frequency_distribution.index]

# Create Plotly figure
fig = go.Figure()

fig.add_trace(go.Bar(
    x=x_labels,
    y=frequency_distribution.values,
    text=frequency_distribution.values,
    textposition='auto',
    marker_color='#3366cc',  
    marker_line_color='#1a3366',  
    marker_line_width=1.5,
    opacity=0.8,  
    width=0.8  
))

fig.update_layout(
    title='Frequency Distribution with k Intervals',
    xaxis_title='Class Intervals',
    yaxis_title='Frequency',
    bargap=0.1,  # Reduced gap between bars
)

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#e6e6e6')  # Light grey gridlines

fig.show()

print(frequency_distribution)

[0, 3)       6
[3, 6)      13
[6, 9)      20
[9, 12)     14
[12, 15)     5
[15, 18)     2
Name: Class Interval, dtype: int64


In [32]:
relative_frequency = frequency_distribution / n

fig.data[0].update(
    y=relative_frequency.values,
    text=[f'{val:.2%}' for val in relative_frequency.values]
)

# Update layout
fig.update_layout(
    title='Relative Frequency Distribution with k Intervals',
    yaxis_title='Relative Frequency',
    yaxis_tickformat=',.0%'
)

fig.show()

print(relative_frequency)


elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison



[0, 3)      0.100000
[3, 6)      0.216667
[6, 9)      0.333333
[9, 12)     0.233333
[12, 15)    0.083333
[15, 18)    0.033333
Name: Class Interval, dtype: float64


In [35]:
# Calculate cumulative frequency
cumulative_frequency = frequency_distribution.cumsum()

fig_cumulative = go.Figure()

fig_cumulative.add_trace(go.Bar(
    x=x_labels,
    y=cumulative_frequency.values,
    text=cumulative_frequency.values,
    textposition='auto',
    marker_color='#3366cc',  
    marker_line_color='#1a3366',  
    marker_line_width=1.5,
    opacity=0.8,  
    width=0.8
))

fig_cumulative.update_layout(
    title='Cumulative Frequency Distribution',
    xaxis_title='Class Intervals',
    yaxis_title='Cumulative Frequency',
    bargap=0.1,
   
)

fig_cumulative.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#e6e6e6')

fig_cumulative.show()

print("Cumulative Frequency Distribution:")
print(cumulative_frequency)

Cumulative Frequency Distribution:
[0, 3)       6
[3, 6)      19
[6, 9)      39
[9, 12)     53
[12, 15)    58
[15, 18)    60
Name: Class Interval, dtype: int64


In [37]:
# Calculate relative cumulative frequency
relative_cumulative_frequency = cumulative_frequency / n

fig_relative_cumulative = go.Figure()

fig_relative_cumulative.add_trace(go.Bar(
    x=x_labels,
    y=relative_cumulative_frequency.values,
    text=[f'{val:.2%}' for val in relative_cumulative_frequency.values],
    textposition='auto',
    marker_color='#3366cc',  
    marker_line_color='#1a3366',  
    marker_line_width=1.5,
    opacity=0.8,  
    width=0.8
))

fig_relative_cumulative.update_layout(
    title='Relative Cumulative Frequency Distribution',
    xaxis_title='Class Intervals',
    yaxis_title='Relative Cumulative Frequency',
    yaxis_tickformat=',.0%',
    bargap=0.1,

)

fig_relative_cumulative.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#e6e6e6')

fig_relative_cumulative.show()

print("\nRelative Cumulative Frequency Distribution:")
print(relative_cumulative_frequency)


Relative Cumulative Frequency Distribution:
[0, 3)      0.100000
[3, 6)      0.316667
[6, 9)      0.650000
[9, 12)     0.883333
[12, 15)    0.966667
[15, 18)    1.000000
Name: Class Interval, dtype: float64


In [38]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import math

# Data
data = [6, 10, 6, 4, 9, 5, 5, 5, 5, 7, 6, 2, 5, 5, 5, 4, 5, 7, 6, 7, 8, 6, 8, 4, 7, 5, 5, 5, 5, 7, 8, 7, 6, 7, 5, 4, 6, 4, 4, 7, 4, 6, 6, 7, 8, 6, 7, 6, 7, 8, 5, 6, 5, 7, 3, 6, 4, 7, 4, 4]

n = len(data)

k = math.floor(math.sqrt(n))

min_value = min(data)
max_value = max(data)
class_width = math.ceil((max_value - min_value) / k)

class_intervals = [min_value + i * class_width for i in range(k + 1)]

df = pd.DataFrame(data, columns=['Value'])

df['Class Interval'] = pd.cut(df['Value'], bins=class_intervals, right=False, include_lowest=True)

frequency_distribution = df['Class Interval'].value_counts().sort_index()

relative_frequency = frequency_distribution / n

cumulative_frequency = frequency_distribution.cumsum()

relative_cumulative_frequency = cumulative_frequency / n

x_labels = [f"[{interval.left:.1f}, {interval.right:.1f})" for interval in relative_frequency.index]

fig_relative = go.Figure()
fig_relative.add_trace(go.Bar(
    x=x_labels,
    y=relative_frequency.values,
    text=[f'{val:.2%}' for val in relative_frequency.values],
    textposition='auto',
    marker_color='#3366cc',
    marker_line_color='#1a3366',
    marker_line_width=1.5,
    opacity=0.8,
    width=0.8
))
fig_relative.update_layout(
    title='Relative Frequency Distribution',
    xaxis_title='Class Intervals',
    yaxis_title='Relative Frequency',
    yaxis_tickformat=',.0%',
    bargap=0.1,
  
)
fig_relative.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#e6e6e6')
fig_relative.show()

fig_cumulative = go.Figure()
fig_cumulative.add_trace(go.Bar(
    x=x_labels,
    y=cumulative_frequency.values,
    text=cumulative_frequency.values,
    textposition='auto',
    marker_color='#3366cc',
    marker_line_color='#1a3366',
    marker_line_width=1.5,
    opacity=0.8,
    width=0.8
))
fig_cumulative.update_layout(
    title='Cumulative Frequency Distribution',
    xaxis_title='Class Intervals',
    yaxis_title='Cumulative Frequency',
    bargap=0.1,
    
)
fig_cumulative.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#e6e6e6')
fig_cumulative.show()

# c. Relative Frequency Histogram
fig_histogram = go.Figure()
fig_histogram.add_trace(go.Bar(
    x=x_labels,
    y=relative_frequency.values,
    text=[f'{val:.2%}' for val in relative_frequency.values],
    textposition='auto',
    marker_color='#3366cc',
    marker_line_color='#1a3366',
    marker_line_width=1.5,
    opacity=0.8,
    width=1.0  # Set width to 1.0 to remove gaps between bars
))
fig_histogram.update_layout(
    title='Relative Frequency Histogram',
    xaxis_title='Class Intervals',
    yaxis_title='Relative Frequency',
    yaxis_tickformat=',.0%',
    bargap=0,  # Remove gap between bars
  
)
fig_histogram.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#e6e6e6')
fig_histogram.show()

# d. Ogive (Cumulative Frequency Curve)
fig_ogive = go.Figure()
fig_ogive.add_trace(go.Scatter(
    x=[interval.right for interval in cumulative_frequency.index],
    y=cumulative_frequency.values,
    mode='lines+markers',
    name='Cumulative Frequency',
    line=dict(color='#3366cc', width=2),
    marker=dict(color='#3366cc', size=8)
))
fig_ogive.update_layout(
    title='Ogive (Cumulative Frequency Curve)',
    xaxis_title='Upper Class Boundaries',
    yaxis_title='Cumulative Frequency',
  
)
fig_ogive.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#e6e6e6')
fig_ogive.show()

# Print the distributions
print("Relative Frequency Distribution:")
print(relative_frequency)
print("\nCumulative Frequency Distribution:")
print(cumulative_frequency)

Relative Frequency Distribution:
[2, 4)      0.033333
[4, 6)      0.416667
[6, 8)      0.433333
[8, 10)     0.100000
[10, 12)    0.016667
[12, 14)    0.000000
[14, 16)    0.000000
Name: Class Interval, dtype: float64

Cumulative Frequency Distribution:
[2, 4)       2
[4, 6)      27
[6, 8)      53
[8, 10)     59
[10, 12)    60
[12, 14)    60
[14, 16)    60
Name: Class Interval, dtype: int64


In [4]:
import plotly.graph_objects as go

# Data
segments = ['Beauty', 'Grooming', 'Health Care', 'Fabric Care and Home Care', 'Baby Care and Family Care']
percentages = [24, 10, 9, 32, 25]

# Create the bar chart
bar_fig = go.Figure(data=[go.Bar(
    x=segments,
    y=percentages,
    marker=dict(color=['#3366cc', '#dc3912', '#ff9900', '#109618', '#990099']),
    text=percentages,
    textposition='auto'
)])

# Update the layout for the bar chart
bar_fig.update_layout(
    title='Procter & Gamble: Percentage of Total Sales by Global Segment (2014)',
    xaxis_title='Global Business Segment',
    yaxis_title='Percentage of Total Sales'
)

# Create another pie chart
pie_fig = go.Figure(data=[go.Pie(
    labels=segments,
    values=percentages,
    textinfo='label+percent',
    insidetextorientation='radial',
    marker=dict(colors=['#3366cc', '#dc3912', '#ff9900', '#109618', '#990099']),
    textfont=dict(size=12),
    hoverinfo='label+percent'
)])

# Update the layout for the second pie chart
pie_fig.update_layout(
    title='Procter & Gamble: Global Business Segment’s Percentage of Total Sales (2014)',
   
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=-0.1,
        xanchor="center",
        x=0.5
    )
)

# Show both figures
bar_fig.show()
pie_fig.show()


Stem | Leaf
------------
  2  | 2 4 8 9
  3  | 0 1 2 3 3 4 5 5 5 6 6 7 8 8 9
  4  | 1 3 5 7 8
  5  | 5 6 6 6
  6  | 0 5 6 6


Regarding the range where most payments are collected, the majority of payments (15 out of 30) fall within the range of 30-39 days