In [1]:
import csv
import numpy as np

In [2]:
def read_data(filename) -> np.array:
    data = []
    with open(filename, 'r') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:
            float_row = [float(value) for value in row]
            data.append(float_row)
    return np.array(data)

In [3]:
data = read_data("data.csv")

In [4]:
data[:5]

array([[0.749648  , 0.12819048, 0.5076381 , 0.05633512],
       [0.4482618 , 0.47509401, 0.4806783 , 0.46893454],
       [0.28352025, 0.19877752, 0.26540263, 0.21699514],
       [0.44636991, 0.45871148, 0.28401327, 0.54549307],
       [0.31940479, 0.42255275, 0.2595175 , 0.4024669 ]])

In [5]:
previous_videos = data[:, 0:2]
next_videos = data[:, 2:4]

In [6]:
previous_videos[:5], next_videos[:5]

(array([[0.749648  , 0.12819048],
        [0.4482618 , 0.47509401],
        [0.28352025, 0.19877752],
        [0.44636991, 0.45871148],
        [0.31940479, 0.42255275]]),
 array([[0.5076381 , 0.05633512],
        [0.4806783 , 0.46893454],
        [0.26540263, 0.21699514],
        [0.28401327, 0.54549307],
        [0.2595175 , 0.4024669 ]]))

In [7]:
def create_transformation_matrix(p_videos, n_videos):
    """
    Calculate the transformation matrix from the generated data points
    using least squares regression.
    
    Parameters:
    p_videos : numpy.ndarray
        Array of shape (n_points, 2) containing current video metrics
    n_videos : numpy.ndarray
        Array of shape (n_points, 2) containing next video metrics
        
    Returns:
    numpy.ndarray : Calculated transformation matrix
    """
    # Solve for the transformation matrix using least squares regression
    X, Y = p_videos, n_videos
    
    XtX = X.T @ X
    XtY = X.T @ Y
    
    return np.linalg.inv(XtX) @ XtY

In [8]:
tm = create_transformation_matrix(previous_videos, next_videos)

In [9]:
print(tm)

[[0.70500624 0.19902547]
 [0.09087316 0.89926622]]


In [10]:
def predict(A, x0, k):
    eigenvalues, eigenvectors = np.linalg.eig(A)
    return eigenvectors @ np.diag(eigenvalues ** k) @ np.linalg.inv(eigenvectors) @ x0

In [11]:
creators = read_data("creators.csv")

In [12]:
creators[:10]

array([[0.81030159, 0.03150058],
       [0.86074837, 0.14886164],
       [1.02041179, 0.27279776],
       [0.7283803 , 0.10593427],
       [0.75933921, 0.14846288],
       [0.7395112 , 0.37027072],
       [0.92225941, 0.47581368],
       [1.07497934, 0.27082828],
       [0.63578949, 0.2707897 ],
       [0.87478257, 0.18797036]])

In [13]:
weeks = 4

In [14]:
final_state = np.array(list(map(lambda x: predict(tm, x, weeks), creators)))

In [15]:
np.argmax(final_state[:, 0])

np.int64(6)

In [16]:
np.argmax(final_state[:, 1])

np.int64(26)

In [17]:
for i, (oc, nc) in enumerate(zip(creators, final_state)):
    if np.argmin(oc) != np.argmin(nc):
        print(i, oc, nc)

5 [0.7395112  0.37027072] [0.38929419 0.41499934]
6 [0.92225941 0.47581368] [0.49151281 0.52780568]
8 [0.63578949 0.2707897 ] [0.3143209  0.32207848]
14 [0.65759568 0.33829511] [0.35004501 0.37562937]
