# Imports.

In [256]:
import pandas as pd
import numpy as np

import secrets

import plotly.graph_objects as go
import plotly.io as pio

from scipy.optimize import fsolve

import os

from IPython import get_ipython
from IPython.display import Image, display


from comp_econ import comp_econ as ce

# Problem 1.

## 1a. Function to calculate y using a for loop.

In [257]:
def y_by_loop(_alpha: np.ndarray, _sigma: np.ndarray, _x: np.ndarray, _p: np.ndarray) -> tuple:
    
    r"""
    Per the problem instructions, all inputs are supposed to be drawn from a random uniform distribution,
    and all input ndarrays must have same length.

    Parameters
    ----------
    _alpha : np.ndarray
        ndarray of scalars drawn from random uniform distribution.
    _sigma : np.ndarray
        ndarray of scalars drawn from random uniform distribution.
    _x : np.ndarray
        ndarray of scalars drawn from random uniform distribution.
    _p : np.ndarray
        ndarray of scalars drawn from random uniform distribution.

    Returns
    -------
    3-tuple
        A float y_ that is the sum of all y_i's multiplied by (1/ N), where y_i = p_i * alpha_i * (x_i ** sigma_i).

        An integer N_, the length each of the input ndarrays.

        A float loop_duration_seconds_ that is the number of seconds the function took to run.
    """

    # We are told every input ndarray is of length N, but we are not told to have N as an input parameter.
    assert _alpha.shape == _sigma.shape == _x.shape == _p.shape, "All input ndarray must have same length."
    # Since all input ndarrays are of same length, pick N = _alpha.shape[0].
    N_ = _alpha.shape[0]

    # Per the instructions, run the calculation in a loop.
    y_results_list = []
    loop_start_ts = pd.Timestamp.now()
    for i in range(0, N_):
        # rng.uniform(...) returns an array of arrays. Using .flatten() here results in a float being
        # returned instead of an ndarray.
        y_i = _p.flatten()[i] * _alpha.flatten()[i] * (_x.flatten()[i] ** _sigma.flatten()[i])
        y_results_list.append(y_i)
    loop_completion_ts = pd.Timestamp.now()

    loop_duration_seconds_ = (loop_completion_ts - loop_start_ts).total_seconds()

    # (1 / N) multiplied by the sum of all y_i's.
    y_ = (1 / N_) * sum(y_results_list)
    y_ = np.round(y_, 4)



    print(f"Successfully calculated y using a for loop, then rounded to 4 digits:\n{y_}")
    print(f"Calculation in a loop took {loop_duration_seconds_} seconds.")

    return y_, N_, loop_duration_seconds_


## 1b. Draw from random uniform distribution using seeds for reproducible results, then compute y using y_by_loop(...).

In [258]:
num_draws = int(1e4)
# num_draws = 2

# Assign seeds for reproducible results.
seed_alpha = secrets.randbits(128)
print(f"seed_alpha: {seed_alpha}")

seed_sigma = secrets.randbits(128) 
print(f"seed_sigma: {seed_sigma}")

seed_x = secrets.randbits(128)
print(f"seed_x: {seed_x}")

seed_p = secrets.randbits(128)
print(f"seed_p: {seed_p}")

print(f"Performing 4 draws of legnth {num_draws} from random uniform distribution.")
# rng = np.random.default_rng()
# Numpy docs for seeds of random numbers.
# https://numpy.org/doc/2.3/reference/random/index.html#random-quick-start
arr_alpha = np.random.default_rng(seed_alpha).uniform(size=(num_draws, 1))
arr_sigma = np.random.default_rng(seed_sigma).uniform(size=(num_draws, 1))
arr_x = np.random.default_rng(seed_x).uniform(size=(num_draws, 1))
arr_p = np.random.default_rng(seed_p).uniform(size=(num_draws, 1))

y_1, N, loop_duration_seconds = y_by_loop(_alpha=arr_alpha, _sigma=arr_sigma, _x=arr_x, _p=arr_p)


seed_alpha: 223033632890419264390639833736888440783
seed_sigma: 205932006462777418980820876980130146401
seed_x: 292700590553462864305833764075011370226
seed_p: 13058703999572845220044003845056428894
Performing 4 draws of legnth 10000 from random uniform distribution.
Successfully calculated y using a for loop, then rounded to 4 digits:
0.1725
Calculation in a loop took 0.106075 seconds.


## 1c. Calculate y using only vector operations.

In [259]:
# Per the numpy documentation, the np.matrix class is no longer recommended for use. Instead, use regular arrays.
# https://numpy.org/doc/stable/reference/generated/numpy.matrix.html
# Per the instructions, y_2 is indeed calculated in 1 line using vector operations.
vector_ops_start_ts = pd.Timestamp.now()
y_2 = np.round(((1 / N) * sum((arr_p * arr_alpha * (arr_x ** arr_sigma)))), 4)[0]
vector_ops_completion_ts = pd.Timestamp.now()
vector_ops_duration_seconds = (vector_ops_completion_ts - vector_ops_start_ts).total_seconds()

print(f"Successfully calculated y using vector operations, then rounded to 4 digits:\n{y_2}")
print(f"Vector operations took {vector_ops_duration_seconds} seconds.")


Successfully calculated y using vector operations, then rounded to 4 digits:
0.1725
Vector operations took 0.007568 seconds.


## Bonus: Check in a pandas DataFrame.

In [260]:
df_problem_1 = pd.DataFrame(data={
    'y_i': [f'y_{i}' for i in range(0, N)],
    'p': arr_p.flatten(),
    'alpha': arr_alpha.flatten(),
    'x': arr_x.flatten(),
    'sigma': arr_sigma.flatten()
    }
)
df_problem_1['y_i'] = df_problem_1['y_i'].astype('string')

df_problem_1 = df_problem_1.assign(
    y_i_value = df_problem_1['p']\
        .mul(df_problem_1['alpha'], fill_value=np.nan)\
        .mul(
            (df_problem_1['x'].pow(df_problem_1['sigma'], fill_value=np.nan)),
            fill_value=np.nan
        )
)

print("df_problem_1 info and shape:")
df_problem_1.info()
print(df_problem_1.shape)

y_3 = (1 / N) * (df_problem_1['y_i_value'].sum())
y_3 = np.round(y_3, 4)
print(f"Successfully calculated y using columns in a pandas DataFrame:\n{y_3}")

df_problem_1 info and shape:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   y_i        10000 non-null  string 
 1   p          10000 non-null  float64
 2   alpha      10000 non-null  float64
 3   x          10000 non-null  float64
 4   sigma      10000 non-null  float64
 5   y_i_value  10000 non-null  float64
dtypes: float64(5), string(1)
memory usage: 468.9 KB
(10000, 6)
Successfully calculated y using columns in a pandas DataFrame:
0.1725


In [261]:
# Display some sampled rows.
df_problem_1.sample(n=10)

Unnamed: 0,y_i,p,alpha,x,sigma,y_i_value
4384,y_4384,0.919078,0.228217,0.692207,0.072867,0.204202
244,y_244,0.385574,0.839852,0.965842,0.568545,0.317489
7686,y_7686,0.686826,0.586175,0.509303,0.477044,0.291802
3687,y_3687,0.240301,0.638456,0.764507,0.840032,0.12244
358,y_358,0.992619,0.664055,0.086472,0.695697,0.120054
4533,y_4533,0.895685,0.603154,0.408887,0.14665,0.473833
2442,y_2442,0.443152,0.17078,0.364505,0.119273,0.067098
9383,y_9383,0.598521,0.433296,0.905162,0.423265,0.248627
6835,y_6835,0.895095,0.218932,0.167121,0.713592,0.054669
4629,y_4629,0.172737,0.885125,0.026982,0.236987,0.06495


## Confirm that all 3 calculation procedures produced the same result.

In [262]:
print(f"y_1 = {y_1}")
print(f"y_2 = {y_2}")
print(f"y_3 = {y_3}")
assert y_1 == y_2 == y_3, "The calculations did not all produce the same result."

y_1 = 0.1725
y_2 = 0.1725
y_3 = 0.1725


## 1d. Timing for loop vs vector operations. We know that vector operations are faster.

In [265]:
loop_diff_vector_seconds = np.round((loop_duration_seconds - vector_ops_duration_seconds), 4)
print(f"Calculating y in a for loop took {loop_diff_vector_seconds} seconds longer than calculating y using vector operations.")
# seconds_pct_diff is negative because vector_ops_duration_seconds < loop_duration_seconds
# ie the vector operations were faster than the loop calculations.
seconds_pct_diff = np.round(np.abs(
    ((vector_ops_duration_seconds - loop_duration_seconds) / loop_duration_seconds) * 100
    )
)
print(f"The vector operations were {seconds_pct_diff}% faster than the loop calculation.")

Calculating y in a for loop took 0.0985 seconds longer than calculating y using vector operations.
The vector operations were 93.0% faster than the loop calculation.


# Problem 2.

# 2a.