In [None]:
"""
Created on Tue. Dec. 3rd, 2024

authors: Christian D. Powell
email: cpowell74@gatech.edu
"""
__version__ = '0.0a0'
from docx import Document
import numpy as np
import pandas as pd
from randomgen.monte_carlo_rv import monte_carlo_rv, STATS
from scipy import stats

# Notebook for Creating Dataset and Figures for Report

## Normal Distribution

In [None]:
norm_100 = monte_carlo_rv(stats.norm, dist_name='Normal', iterations=100)
norm_100.run(verbose=False)
norm_100.plot()

In [None]:
norm_1_000 = monte_carlo_rv(stats.norm, iterations=1_000)
norm_1_000.run(verbose=False)
norm_1_000.plot()

In [None]:
norm_10_000 = monte_carlo_rv(stats.norm, iterations=10_000)
norm_10_000.run(verbose=False)
norm_10_000.plot()

In [None]:
norm_100_000 = monte_carlo_rv(stats.norm, iterations=100_000)
norm_100_000.run(verbose=False)
norm_100_000.plot()

### Plot Statitsics

In [None]:
for s in STATS:
    print(s)
    norm_10_000.plot_statistic(s)

In [None]:
statistics = list()
statistics.append(np.append(np.array(['theoretical', 100_000]), np.round(norm_100_000.theoretical_quantiles()[0], 4)))
statistics.append(np.append(np.array(['simulated', 100]), np.round(norm_100.simulated_quantiles()[0], 4)))
statistics.append(np.append(np.array(['simulated', 1_000]), np.round(norm_1_000.simulated_quantiles()[0], 4)))
statistics.append(np.append(np.array(['simulated', 10_000]), np.round(norm_10_000.simulated_quantiles()[0], 4)))
statistics.append(np.append(np.array(['simulated', 100_000]), np.round(norm_100_000.simulated_quantiles()[0], 4)))
statistics = np.array(statistics)
statistics = pd.DataFrame(statistics)
statistics.columns = ['type', 'count', 'min', '0.01', '0.05', '0.10', '0.25', '0.50', '0.75', '0.90', '0.95', '0.99', 'max']
statistics


In [None]:
document = Document()
# Add the table
table = document.add_table(rows=1, cols=len(statistics.columns))
table.style = 'Table Grid'  # Apply a table style
# Add header row
header_cells = table.rows[0].cells
for i, col_name in enumerate(statistics.columns):
    header_cells[i].text = col_name
# Add data rows
for index, row in statistics.iterrows():
    cells = table.add_row().cells
    for i, value in enumerate(row):
        cells[i].text = str(value)

## Exponential Distribution

In [None]:
expo_100 = monte_carlo_rv(stats.expon, dist_name="Exponential", iterations=100)
expo_100.run(verbose=False)
expo_100.plot()

In [None]:
expo_1_000 = monte_carlo_rv(stats.expon, dist_name="Exponential", iterations=1_000)
expo_1_000.run(verbose=False)
expo_1_000.plot()

In [None]:
expo_10_000 = monte_carlo_rv(stats.expon, dist_name="Exponential", iterations=10_000)
expo_10_000.run(verbose=False)
expo_10_000.plot()

In [None]:
expo_100_000 = monte_carlo_rv(stats.expon, dist_name="Exponential", iterations=100_000)
expo_100_000.run(verbose=False)
expo_100_000.plot()

### Statistics

In [None]:
for s in STATS:
    print(s)
    expo_10_000.plot_statistic(s)

In [None]:
statistics = list()
statistics.append(np.append(np.array(['theoretical', 100_000]), np.round(expo_100_000.theoretical_quantiles()[0], 4)))
statistics.append(np.append(np.array(['simulated', 100]), np.round(expo_100.simulated_quantiles()[0], 4)))
statistics.append(np.append(np.array(['simulated', 1_000]), np.round(expo_1_000.simulated_quantiles()[0], 4)))
statistics.append(np.append(np.array(['simulated', 10_000]), np.round(expo_10_000.simulated_quantiles()[0], 4)))
statistics.append(np.append(np.array(['simulated', 100_000]), np.round(expo_100_000.simulated_quantiles()[0], 4)))
statistics = np.array(statistics)
statistics = pd.DataFrame(statistics)
statistics.columns = ['type', 'count', 'min', '0.01', '0.05', '0.10', '0.25', '0.50', '0.75', '0.90', '0.95', '0.99', 'max']
statistics

In [None]:
# Add the table
table = document.add_table(rows=1, cols=len(statistics.columns))
table.style = 'Table Grid'  # Apply a table style
# Add header row
header_cells = table.rows[0].cells
for i, col_name in enumerate(statistics.columns):
    header_cells[i].text = col_name
# Add data rows
for index, row in statistics.iterrows():
    cells = table.add_row().cells
    for i, value in enumerate(row):
        cells[i].text = str(value)

## Kolmogorov-Smirnov Two-sided Test Statistic Distribution

In [None]:
kolm_100 = monte_carlo_rv('kolmogorov', dist_name="Kolmogorov-Smirnov", iterations=100, samples=1_000)
kolm_100.run(verbose=False)
kolm_100.plot()

In [None]:
kolm_1_000 = monte_carlo_rv('kolmogorov', dist_name="Kolmogorov-Smirnov", iterations=1_000, samples=1_000)
kolm_1_000.run(verbose=False)
kolm_1_000.plot()

In [None]:
kolm_10_000 = monte_carlo_rv('kolmogorov', dist_name="Kolmogorov-Smirnov", iterations=10_000, samples=1_000)
kolm_10_000.run(verbose=False)
kolm_10_000.plot()

In [None]:
kolm_100_000 = monte_carlo_rv('kolmogorov', dist_name="Kolmogorov-Smirnov", iterations=100_000, samples=1_000)
kolm_100_000.run(verbose=False)
kolm_100_000.plot()

### Statistics

In [None]:
for s in STATS:
    print(s)
    kolm_10_000.plot_statistic(s)

In [None]:
statistics = list()
statistics.append(np.append(np.array(['simulated', 100]), np.round(kolm_100.simulated_quantiles()[0], 4)))
statistics.append(np.append(np.array(['simulated', 1_000]), np.round(kolm_1_000.simulated_quantiles()[0], 4)))
statistics.append(np.append(np.array(['simulated', 10_000]), np.round(kolm_10_000.simulated_quantiles()[0], 4)))
statistics.append(np.append(np.array(['simulated', 100_000]), np.round(kolm_100_000.simulated_quantiles()[0], 4)))
statistics = np.array(statistics)
statistics = pd.DataFrame(statistics)
statistics.columns = ['type', 'count', 'min', '0.01', '0.05', '0.10', '0.25', '0.50', '0.75', '0.90', '0.95', '0.99', 'max']
statistics

In [None]:
# Add the table
table = document.add_table(rows=1, cols=len(statistics.columns))
table.style = 'Table Grid'  # Apply a table style
# Add header row
header_cells = table.rows[0].cells
for i, col_name in enumerate(statistics.columns):
    header_cells[i].text = col_name
# Add data rows
for index, row in statistics.iterrows():
    cells = table.add_row().cells
    for i, value in enumerate(row):
        cells[i].text = str(value)

## Durbin–Watson Statistic Distribution

In [None]:
durb_100 = monte_carlo_rv('durbin', dist_name="Durbin–Watson", iterations=100, samples=1_000)
durb_100.run(verbose=False)
durb_100.plot()

In [None]:
durb_1_000 = monte_carlo_rv('durbin', dist_name="Durbin–Watson", iterations=1_000, samples=1_000)
durb_1_000.run(verbose=False)
durb_1_000.plot()

In [None]:
durb_10_000 = monte_carlo_rv('durbin', dist_name="Durbin–Watson", iterations=10_000, samples=1_000)
durb_10_000.run(verbose=False)
durb_10_000.plot()

In [None]:
durb_100_000 = monte_carlo_rv('durbin', dist_name="Durbin–Watson", iterations=100_000, samples=1_000)
durb_100_000.run(verbose=False)
durb_100_000.plot()

### Statistics

In [None]:
for s in STATS:
    print(s)
    durb_10_000.plot_statistic(s)

In [None]:
statistics = list()
statistics.append(np.append(np.array(['simulated', 100]), np.round(durb_100.simulated_quantiles()[0], 4)))
statistics.append(np.append(np.array(['simulated', 1_000]), np.round(durb_1_000.simulated_quantiles()[0], 4)))
statistics.append(np.append(np.array(['simulated', 10_000]), np.round(durb_10_000.simulated_quantiles()[0], 4)))
statistics.append(np.append(np.array(['simulated', 100_000]), np.round(durb_100_000.simulated_quantiles()[0], 4)))
statistics = np.array(statistics)
statistics = pd.DataFrame(statistics)
statistics.columns = ['type', 'count', 'min', '0.01', '0.05', '0.10', '0.25', '0.50', '0.75', '0.90', '0.95', '0.99', 'max']
statistics

In [None]:
# Add the table
table = document.add_table(rows=1, cols=len(statistics.columns))
table.style = 'Table Grid'  # Apply a table style
# Add header row
header_cells = table.rows[0].cells
for i, col_name in enumerate(statistics.columns):
    header_cells[i].text = col_name
# Add data rows
for index, row in statistics.iterrows():
    cells = table.add_row().cells
    for i, value in enumerate(row):
        cells[i].text = str(value)

## Anderson-Darling Statistic Distribution

In [None]:
ande_100 = monte_carlo_rv('anderson', dist_name="Anderson-Darling", iterations=100, samples=1_000)
ande_100.run(verbose=False)
ande_100.plot()

In [None]:
ande_1_000 = monte_carlo_rv('anderson', dist_name="Anderson-Darling", iterations=1_000, samples=1_000)
ande_1_000.run(verbose=False)
ande_1_000.plot()

In [None]:
ande_10_000 = monte_carlo_rv('anderson', dist_name="Anderson-Darling", iterations=10_000, samples=1_000)
ande_10_000.run(verbose=False)
ande_10_000.plot()

In [None]:
ande_100_000 = monte_carlo_rv('anderson', dist_name="Anderson-Darling", iterations=100_000, samples=1_000)
ande_100_000.run(verbose=False)
ande_100_000.plot()

### Statistics

In [None]:
for s in STATS:
    print(s)
    ande_10_000.plot_statistic(s)

In [None]:
statistics = list()
statistics.append(np.append(np.array(['simulated', 100]), np.round(ande_100.simulated_quantiles()[0], 4)))
statistics.append(np.append(np.array(['simulated', 1_000]), np.round(ande_1_000.simulated_quantiles()[0], 4)))
statistics.append(np.append(np.array(['simulated', 10_000]), np.round(ande_10_000.simulated_quantiles()[0], 4)))
statistics.append(np.append(np.array(['simulated', 100_000]), np.round(ande_100_000.simulated_quantiles()[0], 4)))
statistics = np.array(statistics)
statistics = pd.DataFrame(statistics)
statistics.columns = ['type', 'count', 'min', '0.01', '0.05', '0.10', '0.25', '0.50', '0.75', '0.90', '0.95', '0.99', 'max']
statistics

In [None]:
# Add the table
table = document.add_table(rows=1, cols=len(statistics.columns))
table.style = 'Table Grid'  # Apply a table style
# Add header row
header_cells = table.rows[0].cells
for i, col_name in enumerate(statistics.columns):
    header_cells[i].text = col_name
# Add data rows
for index, row in statistics.iterrows():
    cells = table.add_row().cells
    for i, value in enumerate(row):
        cells[i].text = str(value)

## Save Table Document

In [None]:
# Save the document
document.save('my_table.docx')